From 8248c2af94975912b14e7e0cb414fcbb82c77123 Mon Sep 17 00:00:00 2001 From: Gabor Marton Date: Mon, 7 Sep 2020 17:15:15 +0200 Subject: [PATCH 001/161] [analyzer][StdLibraryFunctionsChecker] Have proper weak dependencies We want the generice StdLibraryFunctionsChecker to report only if there are no specific checkers that would handle the argument constraint for a function. Note, the assumptions are still evaluated, even if the arguement constraint checker is set to not report. This means that the assumptions made in the generic StdLibraryFunctionsChecker should be an over-approximation of the assumptions made in the specific checkers. But most importantly, the assumptions should not contradict. Differential Revision: https://reviews.llvm.org/D87240 --- .../clang/StaticAnalyzer/Checkers/Checkers.td | 3 +- .../test/Analysis/analyzer-enabled-checkers.c | 2 +- ...c-library-functions-arg-enabled-checkers.c | 66 +++++++++++++++++++ .../std-c-library-functions-arg-weakdeps.c | 64 ++++++++++++++++++ 4 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c create mode 100644 clang/test/Analysis/std-c-library-functions-arg-weakdeps.c diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index a444843c500603..a61af452313486 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -349,7 +349,6 @@ let ParentPackage = APIModeling in { def StdCLibraryFunctionsChecker : Checker<"StdCLibraryFunctions">, HelpText<"Improve modeling of the C standard library functions">, - Dependencies<[CallAndMessageModeling]>, CheckerOptions<[ CmdLineOption, "such as whether the parameter of isalpha is in the range [0, 255] " "or is EOF.">, Dependencies<[StdCLibraryFunctionsChecker]>, - WeakDependencies<[NonNullParamChecker]>, + WeakDependencies<[CallAndMessageChecker, NonNullParamChecker, StreamChecker]>, Documentation; } // end "alpha.unix" diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c index 7c00e78c16acd8..bef786a1a59b6d 100644 --- a/clang/test/Analysis/analyzer-enabled-checkers.c +++ b/clang/test/Analysis/analyzer-enabled-checkers.c @@ -6,11 +6,11 @@ // CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List // CHECK-EMPTY: -// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: apiModeling.StdCLibraryFunctions // CHECK-NEXT: apiModeling.TrustNonnull // CHECK-NEXT: apiModeling.llvm.CastValue // CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.CallAndMessageModeling // CHECK-NEXT: core.CallAndMessage // CHECK-NEXT: core.DivideZero // CHECK-NEXT: core.DynamicTypePropagation diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c new file mode 100644 index 00000000000000..9ad1be05385172 --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c @@ -0,0 +1,66 @@ +// Here we test the order of the Checkers when StdCLibraryFunctionArgs is +// enabled. + +// RUN: %clang --analyze %s --target=x86_64-pc-linux-gnu \ +// RUN: -Xclang -analyzer-checker=core \ +// RUN: -Xclang -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -Xclang -analyzer-config \ +// RUN: -Xclang apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -Xclang -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -Xclang -analyzer-checker=alpha.unix.Stream \ +// RUN: -Xclang -analyzer-list-enabled-checkers \ +// RUN: -Xclang -analyzer-display-progress \ +// RUN: 2>&1 | FileCheck %s --implicit-check-not=ANALYZE \ +// RUN: --implicit-check-not=\. + +// CHECK: OVERVIEW: Clang Static Analyzer Enabled Checkers List +// CHECK-EMPTY: +// CHECK-NEXT: core.CallAndMessageModeling +// CHECK-NEXT: core.CallAndMessage +// CHECK-NEXT: core.NonNullParamChecker +// CHECK-NEXT: alpha.unix.Stream +// CHECK-NEXT: apiModeling.StdCLibraryFunctions +// CHECK-NEXT: alpha.unix.StdCLibraryFunctionArgs +// CHECK-NEXT: apiModeling.TrustNonnull +// CHECK-NEXT: apiModeling.llvm.CastValue +// CHECK-NEXT: apiModeling.llvm.ReturnValue +// CHECK-NEXT: core.DivideZero +// CHECK-NEXT: core.DynamicTypePropagation +// CHECK-NEXT: core.NonnilStringConstants +// CHECK-NEXT: core.NullDereference +// CHECK-NEXT: core.StackAddrEscapeBase +// CHECK-NEXT: core.StackAddressEscape +// CHECK-NEXT: core.UndefinedBinaryOperatorResult +// CHECK-NEXT: core.VLASize +// CHECK-NEXT: core.builtin.BuiltinFunctions +// CHECK-NEXT: core.builtin.NoReturnFunctions +// CHECK-NEXT: core.uninitialized.ArraySubscript +// CHECK-NEXT: core.uninitialized.Assign +// CHECK-NEXT: core.uninitialized.Branch +// CHECK-NEXT: core.uninitialized.CapturedBlockVariable +// CHECK-NEXT: core.uninitialized.UndefReturn +// CHECK-NEXT: deadcode.DeadStores +// CHECK-NEXT: nullability.NullabilityBase +// CHECK-NEXT: nullability.NullPassedToNonnull +// CHECK-NEXT: nullability.NullReturnedFromNonnull +// CHECK-NEXT: security.insecureAPI.SecuritySyntaxChecker +// CHECK-NEXT: security.insecureAPI.UncheckedReturn +// CHECK-NEXT: security.insecureAPI.getpw +// CHECK-NEXT: security.insecureAPI.gets +// CHECK-NEXT: security.insecureAPI.mkstemp +// CHECK-NEXT: security.insecureAPI.mktemp +// CHECK-NEXT: security.insecureAPI.vfork +// CHECK-NEXT: unix.API +// CHECK-NEXT: unix.cstring.CStringModeling +// CHECK-NEXT: unix.DynamicMemoryModeling +// CHECK-NEXT: unix.Malloc +// CHECK-NEXT: unix.MallocSizeof +// CHECK-NEXT: unix.MismatchedDeallocator +// CHECK-NEXT: unix.Vfork +// CHECK-NEXT: unix.cstring.BadSizeArg +// CHECK-NEXT: unix.cstring.NullArg + +int main() { + int i; + (void)(10 / i); +} diff --git a/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c new file mode 100644 index 00000000000000..0ad3c277dfd7de --- /dev/null +++ b/clang/test/Analysis/std-c-library-functions-arg-weakdeps.c @@ -0,0 +1,64 @@ +// Check that the more specific checkers report and not the generic +// StdCLibraryFunctionArgs checker. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -triple x86_64-unknown-linux-gnu \ +// RUN: -verify + + +// Make sure that all used functions have their summary loaded. + +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=apiModeling.StdCLibraryFunctions \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:ModelPOSIX=true \ +// RUN: -analyzer-checker=alpha.unix.StdCLibraryFunctionArgs \ +// RUN: -analyzer-checker=alpha.unix.Stream \ +// RUN: -analyzer-config apiModeling.StdCLibraryFunctions:DisplayLoadedSummaries=true \ +// RUN: -triple x86_64-unknown-linux 2>&1 | FileCheck %s + +// CHECK: Loaded summary for: int isalnum(int) +// CHECK: Loaded summary for: unsigned long fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))) +// CHECK: Loaded summary for: int fileno(FILE *stream) + +void initializeSummaryMap(); +// We analyze this function first, and the call expression inside initializes +// the summary map. This way we force the loading of the summaries. The +// summaries would not be loaded without this because during the first bug +// report in WeakDependency::checkPreCall we stop further evaluation. And +// StdLibraryFunctionsChecker lazily initializes its summary map from its +// checkPreCall. +void analyzeThisFirst() { + initializeSummaryMap(); +} + +typedef __typeof(sizeof(int)) size_t; +struct FILE; +typedef struct FILE FILE; + +int isalnum(int); +size_t fread(void *restrict, size_t, size_t, FILE *restrict) __attribute__((nonnull(1))); +int fileno(FILE *stream); + +void test_uninit_arg() { + int v; + int r = isalnum(v); // \ + // expected-warning{{1st function call argument is an uninitialized value [core.CallAndMessage]}} + (void)r; +} + +void test_notnull_arg(FILE *F) { + int *p = 0; + fread(p, sizeof(int), 5, F); // \ + expected-warning{{Null pointer passed to 1st parameter expecting 'nonnull' [core.NonNullParamChecker]}} +} + +void test_notnull_stream_arg() { + fileno(0); // \ + // expected-warning{{Stream pointer might be NULL [alpha.unix.Stream]}} +} From eb482afaf5bbf3abf9d02c3810e418945c68a936 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 7 Sep 2020 16:16:52 +0100 Subject: [PATCH 002/161] Reduce the number of memory allocations when displaying a warning about clobbering reserved registers (NFC). Also address some minor inefficiencies and style issues. Differential Revision: https://reviews.llvm.org/D86088 --- .../AsmPrinter/AsmPrinterInlineAsm.cpp | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 538107cecd8b3b..57bf500ba89235 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -547,22 +548,23 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); // Emit warnings if we use reserved registers on the clobber list, as - // that might give surprising results. - std::vector RestrRegs; + // that might lead to undefined behaviour. + SmallVector RestrRegs; + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Start with the first operand descriptor, and iterate over them. for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands(); I < NumOps; ++I) { const MachineOperand &MO = MI->getOperand(I); - if (MO.isImm()) { - unsigned Flags = MO.getImm(); - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber && - !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) { - RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg())); - } - // Skip to one before the next operand descriptor, if it exists. - I += InlineAsm::getNumOperandRegisters(Flags); + if (!MO.isImm()) + continue; + unsigned Flags = MO.getImm(); + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber) { + Register Reg = MI->getOperand(I + 1).getReg(); + if (!TRI->isAsmClobberable(*MF, Reg)) + RestrRegs.push_back(Reg); } + // Skip to one before the next operand descriptor, if it exists. + I += InlineAsm::getNumOperandRegisters(Flags); } if (!RestrRegs.empty()) { @@ -572,14 +574,15 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); std::string Msg = "inline asm clobber list contains reserved registers: "; - for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) { + for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; ++I) { if(I != RestrRegs.begin()) Msg += ", "; - Msg += *I; + Msg += TRI->getName(*I); } - std::string Note = "Reserved registers on the clobber list may not be " - "preserved across the asm statement, and clobbering them may " - "lead to undefined behaviour."; + const char *Note = + "Reserved registers on the clobber list may not be " + "preserved across the asm statement, and clobbering them may " + "lead to undefined behaviour."; SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); } From 2480a31e5d69a5c2e8e900be3a7f706d77f5a5cc Mon Sep 17 00:00:00 2001 From: alex-t Date: Mon, 7 Sep 2020 18:57:27 +0300 Subject: [PATCH 003/161] [AMDGPU] SILowerControlFlow::optimizeEndCF should remove empty basic block optimizeEndCF removes EXEC restoring instruction case this instruction is the only one except the branch to the single successor and that successor contains EXEC mask restoring instruction that was lowered from END_CF belonging to IF_ELSE. As a result of such optimization we get the basic block with the only one instruction that is a branch to the single successor. In case the control flow can reach such an empty block from S_CBRANCH_EXEZ/EXECNZ it might happen that spill/reload instructions that were inserted later by register allocator are placed under exec == 0 condition and never execute. Removing empty block solves the problem. This change require further work to re-implement LIS updates. Recently, LIS is always nullptr in this pass. To enable it we need another patch to fix many places across the codegen. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D86634 --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 47 +++++++++++++++++-- llvm/test/CodeGen/AMDGPU/collapse-endcf.mir | 32 +++++-------- 2 files changed, 55 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 0246c6508e9f20..914668f2b68a21 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -113,6 +113,8 @@ class SILowerControlFlow : public MachineFunctionPass { void combineMasks(MachineInstr &MI); + bool removeMBBifRedundant(MachineBasicBlock &MBB); + void process(MachineInstr &MI); // Skip to the next instruction, ignoring debug instructions, and trivial @@ -154,9 +156,6 @@ class SILowerControlFlow : public MachineFunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(LiveVariablesID); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -604,6 +603,7 @@ void SILowerControlFlow::optimizeEndCf() { if (LIS) LIS->RemoveMachineInstrFromMaps(*MI); MI->eraseFromParent(); + removeMBBifRedundant(MBB); } } } @@ -658,6 +658,47 @@ void SILowerControlFlow::process(MachineInstr &MI) { } } +bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { + bool Redundant = true; + for (auto &I : MBB.instrs()) { + if (!I.isDebugInstr() && !I.isUnconditionalBranch()) + Redundant = false; + } + if (Redundant) { + MachineBasicBlock *Succ = *MBB.succ_begin(); + SmallVector Preds(MBB.predecessors()); + for (auto P : Preds) { + P->replaceSuccessor(&MBB, Succ); + MachineBasicBlock::iterator I(P->getFirstInstrTerminator()); + while (I != P->end()) { + if (I->isBranch()) { + if (TII->getBranchDestBlock(*I) == &MBB) { + I->getOperand(0).setMBB(Succ); + break; + } + } + I++; + } + if (I == P->end()) { + MachineFunction *MF = P->getParent(); + MachineFunction::iterator InsertPt = + P->getNextNode() ? MachineFunction::iterator(P->getNextNode()) + : MF->end(); + MF->splice(InsertPt, Succ); + } + } + MBB.removeSuccessor(Succ); + if (LIS) { + for (auto &I : MBB.instrs()) + LIS->RemoveMachineInstrFromMaps(I); + } + MBB.clear(); + MBB.eraseFromParent(); + return true; + } + return false; +} + bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index d50973c9abf99e..e87f1e7dc8dd03 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -16,16 +16,13 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) - ; GCN: DBG_VALUE ; GCN: bb.4: ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc ; GCN: DBG_VALUE @@ -68,14 +65,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -118,14 +113,12 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.4(0x80000000) ; GCN: bb.4: ; GCN: successors: %bb.5(0x80000000) @@ -387,22 +380,19 @@ body: | ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec ; GCN: bb.1: - ; GCN: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GCN: successors: %bb.2(0x40000000), %bb.5(0x40000000) ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] - ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: S_CBRANCH_EXECZ %bb.5, implicit $exec ; GCN: bb.2: - ; GCN: successors: %bb.3(0x80000000) - ; GCN: bb.3: ; GCN: successors: %bb.5(0x80000000) - ; GCN: S_BRANCH %bb.5 - ; GCN: bb.4: - ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc - ; GCN: S_ENDPGM 0 ; GCN: bb.5: ; GCN: successors: %bb.4(0x80000000) ; GCN: S_BRANCH %bb.4 + ; GCN: bb.4: + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: S_ENDPGM 0 bb.0: successors: %bb.1, %bb.4 From 11d8eedfa5b796a9ba0276a5e4bad8b9e549f0b6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 11:40:59 -0400 Subject: [PATCH 004/161] [InstCombine] move/add tests for icmp with mul operands; NFC --- llvm/test/Transforms/InstCombine/icmp-mul.ll | 311 +++++++++++++++++++ llvm/test/Transforms/InstCombine/icmp.ll | 52 ---- 2 files changed, 311 insertions(+), 52 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index 8e7d9056726e48..719150054015ef 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -365,3 +365,314 @@ define i1 @ne_rem_zero_nonuw(i8 %x) { %b = icmp ne i8 %a, 30 ret i1 %b } + +define i1 @mul_constant_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq( +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 5 + %B = mul i32 %y, 5 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_splat( +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_extra_use2( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i8 %x, 5 + call void @use(i8 %A) + %B = mul i8 %y, 5 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 6 + %B = mul nsw i32 %y, 6 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nsw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nsw <2 x i32> %x, + %B = mul nsw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 74 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 74 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 74 + call void @use(i8 %A) + %B = mul nsw i8 %y, 74 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nsw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nsw_extra_use2( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 20 +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 20 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 20 + %B = mul nsw i8 %y, 20 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nsw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 24 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i8 %x, 24 + call void @use(i8 %A) + %B = mul nsw i8 %y, 24 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 22 + %B = mul nuw i32 %y, 22 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define <2 x i1> @mul_constant_ne_nuw_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul nuw <2 x i32> %x, + %B = mul nuw <2 x i32> %y, + %C = icmp ne <2 x i32> %A, %B + ret <2 x i1> %C +} + +define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use1( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 6 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 6 +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 6 + call void @use(i8 %A) + %B = mul nuw i8 %y, 6 + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_eq_nuw_extra_use2(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_eq_nuw_extra_use2( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 36 +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 36 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 36 + %B = mul nuw i8 %y, 36 + call void @use(i8 %B) + %C = icmp eq i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { +; CHECK-LABEL: @mul_constant_ne_nuw_extra_use3( +; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[A]]) +; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 38 +; CHECK-NEXT: call void @use(i8 [[B]]) +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i8 %x, 38 + call void @use(i8 %A) + %B = mul nuw i8 %y, 38 + call void @use(i8 %B) + %C = icmp ne i8 %A, %B + ret i1 %C +} + +define i1 @mul_constant_ult(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_ult( +; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 47 +; CHECK-NEXT: [[B:%.*]] = mul i32 [[Y:%.*]], 47 +; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 47 + %B = mul i32 %y, 47 + %C = icmp ult i32 %A, %B + ret i1 %C +} + +define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_nuw_sgt( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 46 +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 46 + %C = icmp sgt i32 %A, %B + ret i1 %C +} + +define i1 @mul_mismatch_constant_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_mismatch_constant_nuw_eq( +; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 +; CHECK-NEXT: [[B:%.*]] = mul nuw i32 [[Y:%.*]], 44 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[A]], [[B]] +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nuw i32 %x, 46 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +; If the multiply constant has any trailing zero bits but could overflow, +; we get something completely different. +; We mask off the high bits of each input and then convert: +; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 + +define i1 @mul_constant_partial_nuw_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_partial_nuw_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 44 + %B = mul nuw i32 %y, 44 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @mul_constant_mismatch_wrap_eq(i32 %x, i32 %y) { +; CHECK-LABEL: @mul_constant_mismatch_wrap_eq( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul nsw i32 %x, 54 + %B = mul nuw i32 %y, 54 + %C = icmp eq i32 %A, %B + ret i1 %C +} + +define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 +; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[C]] +; + %A = mul i32 %x, 12 + %B = mul i32 %y, 12 + %C = icmp ne i32 %A, %B + ret i1 %C +} + +define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @eq_mul_constants_with_tz_splat( +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[C]] +; + %A = mul <2 x i32> %x, + %B = mul <2 x i32> %y, + %C = icmp eq <2 x i32> %A, %B + ret <2 x i1> %C +} diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index a9bda13e15b905..683518121789cc 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -3397,58 +3397,6 @@ define i1 @eq_add_constants(i32 %x, i32 %y) { ret i1 %C } -define i1 @eq_mul_constants(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants( -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 5 - %B = mul i32 %y, 5 - %C = icmp eq i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_splat( -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp ne <2 x i32> %A, %B - ret <2 x i1> %C -} - -; If the multiply constant has any trailing zero bits, we get something completely different. -; We mask off the high bits of each input and then convert: -; (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - -define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823 -; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: ret i1 [[C]] -; - %A = mul i32 %x, 12 - %B = mul i32 %y, 12 - %C = icmp ne i32 %A, %B - ret i1 %C -} - -define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @eq_mul_constants_with_tz_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: ret <2 x i1> [[C]] -; - %A = mul <2 x i32> %x, - %B = mul <2 x i32> %y, - %C = icmp eq <2 x i32> %A, %B - ret <2 x i1> %C -} - declare i32 @llvm.bswap.i32(i32) define i1 @bswap_ne(i32 %x, i32 %y) { From 7a6d6f0f7046f6ebcbf06eaf8f996d991a90e440 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 12:37:59 -0400 Subject: [PATCH 005/161] [InstCombine] improve folds for icmp with multiply operands (PR47432) Check for no overflow along with an odd constant before we lose information by converting to bitwise logic. https://rise4fun.com/Alive/2Xl Pre: C1 != 0 %mx = mul nsw i8 %x, C1 %my = mul nsw i8 %y, C1 %r = icmp eq i8 %mx, %my => %r = icmp eq i8 %x, %y Name: nuw ne Pre: C1 != 0 %mx = mul nuw i8 %x, C1 %my = mul nuw i8 %y, C1 %r = icmp ne i8 %mx, %my => %r = icmp ne i8 %x, %y Name: odd ne Pre: C1 % 2 != 0 %mx = mul i8 %x, C1 %my = mul i8 %y, C1 %r = icmp ne i8 %mx, %my => %r = icmp ne i8 %x, %y --- .../InstCombine/InstCombineCompares.cpp | 17 +++++-- llvm/test/Transforms/InstCombine/icmp-mul.ll | 46 ++++++++----------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 350d00095c6f19..608017b6dca251 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3983,6 +3983,19 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, ConstantExpr::getNeg(RHSC)); } + { + // Try to remove shared constant multiplier from equality comparison: + // X * C == Y * C (with no overflowing/aliasing) --> X == Y + Value *X, *Y; + const APInt *C; + if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 && + match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality()) + if (!C->countTrailingZeros() || + (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) || + (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap())) + return new ICmpInst(Pred, X, Y); + } + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -4059,10 +4072,6 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } - // If there are no trailing zeros in the multiplier, just eliminate - // the multiplies (no masking is needed): - // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y - return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; } diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll index 719150054015ef..e2aff1c304adf0 100644 --- a/llvm/test/Transforms/InstCombine/icmp-mul.ll +++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll @@ -392,8 +392,7 @@ define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_ne_extra_use1( ; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul i8 %x, 5 @@ -405,10 +404,9 @@ define i1 @mul_constant_ne_extra_use1(i8 %x, i8 %y) { define i1 @mul_constant_eq_extra_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_eq_extra_use2( -; CHECK-NEXT: [[A:%.*]] = mul i8 [[X:%.*]], 5 ; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul i8 %x, 5 @@ -424,7 +422,7 @@ define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i8 [[A]]) ; CHECK-NEXT: [[B:%.*]] = mul i8 [[Y:%.*]], 5 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul i8 %x, 5 @@ -437,9 +435,7 @@ define i1 @mul_constant_ne_extra_use3(i8 %x, i8 %y) { define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_eq_nsw( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i32 %x, 6 @@ -450,9 +446,7 @@ define i1 @mul_constant_eq_nsw(i32 %x, i32 %y) { define <2 x i1> @mul_constant_ne_nsw_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @mul_constant_ne_nsw_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = mul nsw <2 x i32> %x, @@ -465,8 +459,7 @@ define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_ne_nsw_extra_use1( ; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 74 ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 74 -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i8 %x, 74 @@ -478,10 +471,9 @@ define i1 @mul_constant_ne_nsw_extra_use1(i8 %x, i8 %y) { define i1 @mul_constant_eq_nsw_extra_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_eq_nsw_extra_use2( -; CHECK-NEXT: [[A:%.*]] = mul nsw i8 [[X:%.*]], 20 ; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 20 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i8 %x, 20 @@ -497,7 +489,7 @@ define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i8 [[A]]) ; CHECK-NEXT: [[B:%.*]] = mul nsw i8 [[Y:%.*]], 24 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nsw i8 %x, 24 @@ -510,9 +502,7 @@ define i1 @mul_constant_ne_nsw_extra_use3(i8 %x, i8 %y) { define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_nuw_eq( -; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 2147483647 -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i32 %x, 22 @@ -523,9 +513,7 @@ define i1 @mul_constant_nuw_eq(i32 %x, i32 %y) { define <2 x i1> @mul_constant_ne_nuw_splat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @mul_constant_ne_nuw_splat( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], -; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[C]] ; %A = mul nuw <2 x i32> %x, @@ -538,8 +526,7 @@ define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_ne_nuw_extra_use1( ; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 6 ; CHECK-NEXT: call void @use(i8 [[A]]) -; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 6 -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y:%.*]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i8 %x, 6 @@ -551,10 +538,9 @@ define i1 @mul_constant_ne_nuw_extra_use1(i8 %x, i8 %y) { define i1 @mul_constant_eq_nuw_extra_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_constant_eq_nuw_extra_use2( -; CHECK-NEXT: [[A:%.*]] = mul nuw i8 [[X:%.*]], 36 ; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 36 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i8 %x, 36 @@ -570,7 +556,7 @@ define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { ; CHECK-NEXT: call void @use(i8 [[A]]) ; CHECK-NEXT: [[B:%.*]] = mul nuw i8 [[Y:%.*]], 38 ; CHECK-NEXT: call void @use(i8 [[B]]) -; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A]], [[B]] +; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[X]], [[Y]] ; CHECK-NEXT: ret i1 [[C]] ; %A = mul nuw i8 %x, 38 @@ -581,6 +567,8 @@ define i1 @mul_constant_ne_nuw_extra_use3(i8 %x, i8 %y) { ret i1 %C } +; Negative test - wrong pred + define i1 @mul_constant_ult(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_ult( ; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 47 @@ -594,6 +582,8 @@ define i1 @mul_constant_ult(i32 %x, i32 %y) { ret i1 %C } +; Negative test - wrong pred + define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_constant_nuw_sgt( ; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 @@ -607,6 +597,8 @@ define i1 @mul_constant_nuw_sgt(i32 %x, i32 %y) { ret i1 %C } +; Negative test - wrong constants + define i1 @mul_mismatch_constant_nuw_eq(i32 %x, i32 %y) { ; CHECK-LABEL: @mul_mismatch_constant_nuw_eq( ; CHECK-NEXT: [[A:%.*]] = mul nuw i32 [[X:%.*]], 46 From 1c34ac03a2de0e10f95f16526296dcae5166d129 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 16:56:57 +0100 Subject: [PATCH 006/161] LeonPasses.h - remove orphan function declarations. NFCI. The implementations no longer exist. --- llvm/lib/Target/Sparc/LeonPasses.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h index b165bc93780f65..6baf9fddbe5082 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.h +++ b/llvm/lib/Target/Sparc/LeonPasses.h @@ -33,13 +33,11 @@ class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass protected: LEONMachineFunctionPass(char &ID); - int GetRegIndexForOperand(MachineInstr &MI, int OperandIndex); void clearUsedRegisterList() { UsedRegisters.clear(); } void markRegisterUsed(int registerIndex) { UsedRegisters.push_back(registerIndex); } - int getUnusedFPRegister(MachineRegisterInfo &MRI); }; class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass { From dfc333050b544173741b66f27872cebb2b7ab983 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 17:09:42 +0100 Subject: [PATCH 007/161] LeonPasses.h - remove unnecessary includes. NFCI. Reduce to forward declarations and move includes to LeonPasses.cpp where necessary. --- llvm/lib/Target/Sparc/LeonPasses.cpp | 7 +++---- llvm/lib/Target/Sparc/LeonPasses.h | 7 ++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp index e9d3aaeb9cfe23..6ad6940c6b51b2 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.cpp +++ b/llvm/lib/Target/Sparc/LeonPasses.cpp @@ -10,14 +10,13 @@ //===----------------------------------------------------------------------===// #include "LeonPasses.h" -#include "llvm/CodeGen/ISDOpcodes.h" +#include "SparcSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/Support/raw_ostream.h" + using namespace llvm; LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID) diff --git a/llvm/lib/Target/Sparc/LeonPasses.h b/llvm/lib/Target/Sparc/LeonPasses.h index 6baf9fddbe5082..9bc4569a12984a 100644 --- a/llvm/lib/Target/Sparc/LeonPasses.h +++ b/llvm/lib/Target/Sparc/LeonPasses.h @@ -12,14 +12,11 @@ #ifndef LLVM_LIB_TARGET_SPARC_LEON_PASSES_H #define LLVM_LIB_TARGET_SPARC_LEON_PASSES_H -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" - -#include "Sparc.h" -#include "SparcSubtarget.h" namespace llvm { +class SparcSubtarget; + class LLVM_LIBRARY_VISIBILITY LEONMachineFunctionPass : public MachineFunctionPass { protected: From 95ca3aacf0f82955e9d259484b886c260337285c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 17:50:58 +0100 Subject: [PATCH 008/161] BTFDebug.h - reduce MachineInstr.h include to forward declaration. NFCI. --- llvm/lib/Target/BPF/BTFDebug.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index db5b5633f6d908..1bad0d11fee4ba 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -16,7 +16,8 @@ #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/DebugHandlerBase.h" -#include "llvm/CodeGen/MachineInstr.h" +#include +#include #include #include #include "BTF.h" @@ -27,9 +28,12 @@ class AsmPrinter; class BTFDebug; class DIType; class GlobalVariable; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MCInst; class MCStreamer; class MCSymbol; -class MachineFunction; /// The base class for BTF type generation. class BTFTypeBase { From 4e89a0ab02148c71d5be076e0d7262e93010006b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 18:15:26 +0100 Subject: [PATCH 009/161] MipsISelLowering.h - remove CCState/CCValAssign forward declarations. NFCI. These are already defined in the CallingConvLower.h include. --- llvm/lib/Target/Mips/MipsISelLowering.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h index 0c5df4ba1bade7..03933d82057663 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -40,8 +40,6 @@ namespace llvm { class Argument; -class CCState; -class CCValAssign; class FastISel; class FunctionLoweringInfo; class MachineBasicBlock; From 5ea9e655efdd1188d9864a6c97a7a9b772559ff5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 7 Sep 2020 18:35:06 +0100 Subject: [PATCH 010/161] VPlan.h - remove unnecessary forward declarations. NFCI. Already defined in includes. --- llvm/lib/Transforms/Vectorize/VPlan.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 078b2ba1c70ac3..9c9e2ec8222d10 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -51,14 +51,12 @@ namespace llvm { class BasicBlock; class DominatorTree; class InnerLoopVectorizer; -template class InterleaveGroup; class LoopInfo; class raw_ostream; class RecurrenceDescriptor; class Value; class VPBasicBlock; class VPRegionBlock; -class VPSlotTracker; class VPlan; class VPlanSlp; From e52e7ad54defa3a95040b680beff2824c9c6fbb7 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 7 Sep 2020 13:57:06 -0400 Subject: [PATCH 011/161] [ms] [llvm-ml] Add support for bitwise named operators (AND, NOT, OR) in MASM Add support for expressions of the form '1 or 2', etc. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86944 --- llvm/lib/MC/MCParser/MasmParser.cpp | 35 +++++++++++++------ .../llvm-ml/named_bitwise_operators.test | 20 +++++++++++ 2 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 llvm/test/tools/llvm-ml/named_bitwise_operators.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 45165ffe3cac00..94cef83bc405ed 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -1314,7 +1314,7 @@ bool MasmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= '.' -/// primaryexpr ::= ~,+,- primaryexpr +/// primaryexpr ::= ~,+,-,'not' primaryexpr bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { SMLoc FirstTokenLoc = getLexer().getLoc(); AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); @@ -1352,6 +1352,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return Error(FirstTokenLoc, "invalid token in expression"); } } + // Parse named bitwise negation. + if (Identifier.equals_lower("not")) { + if (parsePrimaryExpr(Res, EndLoc)) + return true; + Res = MCUnaryExpr::createNot(Res, getContext(), FirstTokenLoc); + return false; + } // Parse symbol variant. std::pair Split; if (!MAI.useParensForSymbolVariant()) { @@ -1772,8 +1779,18 @@ bool MasmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc) { SMLoc StartLoc = Lexer.getLoc(); while (true) { + AsmToken::TokenKind TokKind = Lexer.getKind(); + if (Lexer.getKind() == AsmToken::Identifier) { + StringRef Identifier = Lexer.getTok().getString(); + if (Identifier.equals_lower("and")) + TokKind = AsmToken::Amp; + else if (Identifier.equals_lower("not")) + TokKind = AsmToken::Exclaim; + else if (Identifier.equals_lower("or")) + TokKind = AsmToken::Pipe; + } MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; - unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind); + unsigned TokPrec = getBinOpPrecedence(TokKind, Kind); // If the next token is lower precedence than we are allowed to eat, return // successfully with what we ate already. @@ -3229,7 +3246,7 @@ bool MasmParser::parseScalarInitializer(unsigned Size, Lex(); } else { const MCExpr *Value; - if (checkForValidSection() || parseExpression(Value)) + if (parseExpression(Value)) return true; if (getTok().is(AsmToken::Identifier) && getTok().getString().equals_lower("dup")) { @@ -3449,6 +3466,9 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics, // Initialize real data values. bool MasmParser::emitRealValues(const fltSemantics &Semantics) { + if (checkForValidSection()) + return true; + SmallVector ValuesAsInt; if (parseRealInstList(Semantics, ValuesAsInt)) return true; @@ -3468,8 +3488,7 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { Field.SizeOf = 0; - if (checkForValidSection() || - parseRealInstList(Semantics, RealInfo.AsIntValues)) + if (parseRealInstList(Semantics, RealInfo.AsIntValues)) return true; Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; @@ -3486,9 +3505,6 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { /// ::= (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics) { - if (checkForValidSection()) - return true; - if (StructInProgress.empty()) { // Initialize data value. if (emitRealValues(Semantics)) @@ -3504,9 +3520,6 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, StringRef Name, SMLoc NameLoc) { - if (checkForValidSection()) - return true; - if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); diff --git a/llvm/test/tools/llvm-ml/named_bitwise_operators.test b/llvm/test/tools/llvm-ml/named_bitwise_operators.test new file mode 100644 index 00000000000000..f122dbe842d0f7 --- /dev/null +++ b/llvm/test/tools/llvm-ml/named_bitwise_operators.test @@ -0,0 +1,20 @@ +; RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data + +t1 BYTE NOT 1 +; CHECK: t1: +; CHECK-NEXT: .byte -2 + +t2 BYTE 1 OR 2 +; CHECK: t2: +; CHECK-NEXT: .byte 3 + +t3 BYTE 6 AND 10 +; CHECK: t3: +; CHECK-NEXT: .byte 2 + +.code +xor eax, eax + +END From 2feb6e9b8418b29c002bc830a3e2fdcbe9e39449 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 7 Sep 2020 13:58:55 -0400 Subject: [PATCH 012/161] [ms] [llvm-ml] Fix STRUCT field alignment MASM aligns fields to the _minimum_ of the STRUCT alignment value and the size of the next field. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86945 --- llvm/lib/MC/MCParser/MasmParser.cpp | 48 ++++++++++++++++------------- llvm/test/tools/llvm-ml/struct.test | 32 +++++++++---------- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 94cef83bc405ed..333eef2f698fda 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -127,7 +127,7 @@ struct StructInfo { std::vector Fields; StringMap FieldsByName; - FieldInfo &addField(StringRef FieldName, FieldType FT); + FieldInfo &addField(StringRef FieldName, FieldType FT, size_t FieldSize); StructInfo() = default; @@ -330,7 +330,8 @@ struct FieldInfo { FieldInfo(FieldType FT) : Contents(FT) {} }; -FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { +FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, + size_t FieldSize) { if (!FieldName.empty()) FieldsByName[FieldName] = Fields.size(); Fields.emplace_back(FT); @@ -338,7 +339,7 @@ FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { if (IsUnion) { Field.Offset = 0; } else { - Size = llvm::alignTo(Size, Alignment); + Size = llvm::alignTo(Size, std::min(Alignment, FieldSize)); Field.Offset = Size; } return Field; @@ -759,13 +760,14 @@ class MasmParser : public MCAsmParser { // "real4", "real8" bool emitRealValues(const fltSemantics &Semantics); - bool addRealField(StringRef Name, const fltSemantics &Semantics); - bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics); + bool addRealField(StringRef Name, const fltSemantics &Semantics, size_t Size); + bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics, + size_t Size); bool parseRealInstList( const fltSemantics &Semantics, SmallVectorImpl &Values, const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); bool parseDirectiveNamedRealValue(StringRef IDVal, - const fltSemantics &Semantics, + const fltSemantics &Semantics, size_t Size, StringRef Name, SMLoc NameLoc); bool parseOptionalAngleBracketOpen(); @@ -2118,9 +2120,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, case DK_DQ: return parseDirectiveValue(IDVal, 8); case DK_REAL4: - return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle(), 4); case DK_REAL8: - return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble()); + return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble(), 8); case DK_STRUCT: case DK_UNION: return parseDirectiveNestedStruct(IDVal, DirKind); @@ -2343,12 +2345,12 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveNamedValue(nextVal, 8, IDVal, IDLoc); case DK_REAL4: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEsingle(), 4, + IDVal, IDLoc); case DK_REAL8: Lex(); - return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal, - IDLoc); + return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), 8, + IDVal, IDLoc); case DK_STRUCT: case DK_UNION: Lex(); @@ -3306,7 +3308,7 @@ bool MasmParser::emitIntegralValues(unsigned Size) { // Add a field to the current structure. bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL); + FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL, Size); IntFieldInfo &IntInfo = Field.Contents.IntInfo; Field.Type = Size; @@ -3481,9 +3483,10 @@ bool MasmParser::emitRealValues(const fltSemantics &Semantics) { } // Add a real field to the current struct. -bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { +bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics, + size_t Size) { StructInfo &Struct = StructInProgress.back(); - FieldInfo &Field = Struct.addField(Name, FT_REAL); + FieldInfo &Field = Struct.addField(Name, FT_REAL, Size); RealFieldInfo &RealInfo = Field.Contents.RealInfo; Field.SizeOf = 0; @@ -3504,12 +3507,13 @@ bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { /// parseDirectiveRealValue /// ::= (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveRealValue(StringRef IDVal, - const fltSemantics &Semantics) { + const fltSemantics &Semantics, + size_t Size) { if (StructInProgress.empty()) { // Initialize data value. if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField("", Semantics)) { + } else if (addRealField("", Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3519,14 +3523,15 @@ bool MasmParser::parseDirectiveRealValue(StringRef IDVal, /// ::= name (real4 | real8) [ expression (, expression)* ] bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, - StringRef Name, SMLoc NameLoc) { + size_t Size, StringRef Name, + SMLoc NameLoc) { if (StructInProgress.empty()) { // Initialize named data value. MCSymbol *Sym = getContext().getOrCreateSymbol(Name); getStreamer().emitLabel(Sym); if (emitRealValues(Semantics)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - } else if (addRealField(Name, Semantics)) { + } else if (addRealField(Name, Semantics, Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); } return false; @@ -3956,7 +3961,7 @@ bool MasmParser::emitStructValues(const StructInfo &Structure) { // Declare a field in the current struct. bool MasmParser::addStructField(StringRef Name, const StructInfo &Structure) { StructInfo &OwningStruct = StructInProgress.back(); - FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT); + FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; StructInfo.Structure = Structure; @@ -4130,7 +4135,8 @@ bool MasmParser::parseDirectiveNestedEnds() { else ParentStruct.Size += Structure.Size; } else { - FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT); + FieldInfo &Field = + ParentStruct.addField(Structure.Name, FT_STRUCT, Structure.Size); StructFieldInfo &StructInfo = Field.Contents.StructInfo; Field.Type = Structure.Size; Field.LengthOf = 1; diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test index fa85ecd455dda5..38fc763fc7e1f4 100644 --- a/llvm/test/tools/llvm-ml/struct.test +++ b/llvm/test/tools/llvm-ml/struct.test @@ -34,11 +34,9 @@ t1 foobar <> ; CHECK-NEXT: .byte 1 ; CHECK-NEXT: .byte 2 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 6 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "abcde", plus alignment padding ; CHECK-NEXT: .byte 97 @@ -65,11 +63,9 @@ t2 FOOBAR <"gh",,<10,11>,<12>,"ijk"> ; CHECK-NEXT: .byte 10 ; CHECK-NEXT: .byte 11 ; -; , with internal alignment padding +; , with no alignment padding (field size < alignment) ; CHECK-NEXT: .byte 12 -; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .byte 7 -; CHECK-NEXT: .zero 1 ; ; BYTE "ijk", padded with " ", plus alignment padding ; CHECK-NEXT: .byte 105 @@ -87,16 +83,16 @@ mov eax, [t2].f.h mov eax, [t2.f.h] ; CHECK: t3: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t4: mov eax, j.FOOBAR.f.h mov eax, j.baz.b ; CHECK: t4: -; CHECK-NEXT: mov eax, dword ptr [rip + j+12] +; CHECK-NEXT: mov eax, dword ptr [rip + j+11] ; CHECK-NEXT: mov eax, dword ptr [rip + j+1] t5: @@ -105,9 +101,9 @@ mov eax, [ebx.FOOBAR].f.h mov eax, [ebx.FOOBAR.f.h] ; CHECK: t5: -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] -; CHECK-NEXT: mov eax, dword ptr [ebx + 12] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] +; CHECK-NEXT: mov eax, dword ptr [ebx + 11] t6: mov eax, t2.FOOBAR.f.h @@ -116,10 +112,10 @@ mov eax, [t2.FOOBAR].f.h mov eax, [t2.FOOBAR.f.h] ; CHECK: t6: -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] -; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+11] t7: mov eax, [ebx].FOOBAR.e.b @@ -185,7 +181,7 @@ mov eax, FOOBAR.f.h ; CHECK: t10: ; CHECK-NEXT: mov eax, 10 -; CHECK-NEXT: mov eax, 12 +; CHECK-NEXT: mov eax, 11 t11: mov eax, (FOOBAR PTR [ebx]).f From a3ec4a3158f3a60c16ac1e3550667866fe1d4171 Mon Sep 17 00:00:00 2001 From: Eric Astor Date: Mon, 7 Sep 2020 14:00:05 -0400 Subject: [PATCH 013/161] [ms] [llvm-ml] Allow use of locally-defined variables in expressions MASM allows variables defined by equate statements to be used in expressions. Reviewed By: thakis Differential Revision: https://reviews.llvm.org/D86946 --- llvm/lib/MC/MCParser/MasmParser.cpp | 5 +++++ llvm/test/tools/llvm-ml/variable.test | 13 +++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 llvm/test/tools/llvm-ml/variable.test diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 333eef2f698fda..4d62174f7e5e46 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -3076,6 +3076,11 @@ bool MasmParser::parseDirectiveEquate(StringRef IDVal, StringRef Name, SMLoc EndLoc, StartLoc = Lexer.getLoc(); if (parseExpression(Expr, EndLoc)) return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + MCSymbol *Sym = getContext().getOrCreateSymbol(Var.Name); + Sym->setRedefinable(Var.Redefinable); + Sym->setVariableValue(Expr); + Sym->setExternal(false); + if (Expr->evaluateAsAbsolute(Var.NumericValue, getStreamer().getAssemblerPtr())) return false; diff --git a/llvm/test/tools/llvm-ml/variable.test b/llvm/test/tools/llvm-ml/variable.test new file mode 100644 index 00000000000000..4e89d67bd59dd7 --- /dev/null +++ b/llvm/test/tools/llvm-ml/variable.test @@ -0,0 +1,13 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data +t1_value equ 1 or 2 + +t1 BYTE t1_value DUP (0) +; CHECK: t1: +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .byte 0 +; CHECK-NOT: .byte 0 + +END From 7a06b166b1afb457a7df6ad73a6710b4dde4db68 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 14:11:06 -0400 Subject: [PATCH 014/161] [DAGCombiner] allow more store merging for non-i8 truncated ops This is a follow-up suggested in D86420 - if we have a pair of stores in inverted order for the target endian, we can rotate the source bits into place. The "be_i64_to_i16_order" test shows a limitation of the current function (which might be avoided if we integrate this function with the other cases in mergeConsecutiveStores). In the earlier "be_i64_to_i16" test, we skip the first 2 stores because we do not match the full set as consecutive or rotate-able, but then we reach the last 2 stores and see that they are an inverted pair of 16-bit stores. The "be_i64_to_i16_order" test alters the program order of the stores, so we miss matching the sub-pattern. Differential Revision: https://reviews.llvm.org/D87112 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++-- .../test/CodeGen/AArch64/merge-trunc-store.ll | 49 +++++++++---------- llvm/test/CodeGen/X86/stores-merging.ll | 22 +++------ 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 286d54386357f7..37d8cdd695445f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7011,12 +7011,15 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // Check if the offsets line up for the native data layout of this target. bool NeedBswap = false; + bool NeedRotate = false; if (!checkOffsets(Layout.isLittleEndian())) { // Special-case: check if byte offsets line up for the opposite endian. - // TODO: We could use rotates for 16/32-bit merge pairs. - if (NarrowNumBits != 8 || !checkOffsets(Layout.isBigEndian())) + if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian())) + NeedBswap = true; + else if (NumStores == 2 && checkOffsets(Layout.isBigEndian())) + NeedRotate = true; + else return SDValue(); - NeedBswap = true; } SDLoc DL(N); @@ -7026,11 +7029,16 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue); } - // Before legalize we can introduce illegal bswaps which will be later + // Before legalize we can introduce illegal bswaps/rotates which will be later // converted to an explicit bswap sequence. This way we end up with a single // store and byte shuffling instead of several stores and byte shuffling. - if (NeedBswap) + if (NeedBswap) { SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue); + } else if (NeedRotate) { + assert(WideNumBits % 2 == 0 && "Unexpected type for rotate"); + SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT); + SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt); + } SDValue NewStore = DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(), diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll index 3f8fa3e9e38379..b4c6e7736837a2 100644 --- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll @@ -207,9 +207,8 @@ define void @le_i32_to_i16(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w0, [x1] -; BE-NEXT: strh w8, [x1, #2] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -228,9 +227,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { ; ; BE-LABEL: le_i32_to_i16_order: ; BE: // %bb.0: -; BE-NEXT: lsr w8, w0, #16 -; BE-NEXT: strh w8, [x1, #2] -; BE-NEXT: strh w0, [x1] +; BE-NEXT: ror w8, w0, #16 +; BE-NEXT: str w8, [x1] ; BE-NEXT: ret %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -244,9 +242,8 @@ define void @le_i32_to_i16_order(i32 %x, i16* %p0) { define void @be_i32_to_i16(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w0, [x1, #2] -; LE-NEXT: strh w8, [x1] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16: @@ -265,9 +262,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) { define void @be_i32_to_i16_order(i32 %x, i16* %p0) { ; LE-LABEL: be_i32_to_i16_order: ; LE: // %bb.0: -; LE-NEXT: lsr w8, w0, #16 -; LE-NEXT: strh w8, [x1] -; LE-NEXT: strh w0, [x1, #2] +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: str w8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i32_to_i16_order: @@ -528,13 +524,12 @@ define void @le_i64_to_i16_order(i64 %x, i16* %p0) { define void @be_i64_to_i16(i64 %x, i16* %p0) { ; LE-LABEL: be_i64_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #16 -; LE-NEXT: lsr x9, x0, #32 -; LE-NEXT: lsr x10, x0, #48 -; LE-NEXT: strh w0, [x1, #6] -; LE-NEXT: strh w8, [x1, #4] -; LE-NEXT: strh w9, [x1, #2] -; LE-NEXT: strh w10, [x1] +; LE-NEXT: lsr x8, x0, #32 +; LE-NEXT: lsr x9, x0, #48 +; LE-NEXT: ror w10, w0, #16 +; LE-NEXT: str w10, [x1, #4] +; LE-NEXT: strh w8, [x1, #2] +; LE-NEXT: strh w9, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i16: @@ -599,8 +594,8 @@ define void @le_i64_to_i32(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -619,8 +614,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { ; ; BE-LABEL: le_i64_to_i32_order: ; BE: // %bb.0: -; BE-NEXT: lsr x8, x0, #32 -; BE-NEXT: stp w0, w8, [x1] +; BE-NEXT: ror x8, x0, #32 +; BE-NEXT: str x8, [x1] ; BE-NEXT: ret %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -634,8 +629,8 @@ define void @le_i64_to_i32_order(i64 %x, i32* %p0) { define void @be_i64_to_i32(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32: @@ -654,8 +649,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) { define void @be_i64_to_i32_order(i64 %x, i32* %p0) { ; LE-LABEL: be_i64_to_i32_order: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: stp w8, w0, [x1] +; LE-NEXT: ror x8, x0, #32 +; LE-NEXT: str x8, [x1] ; LE-NEXT: ret ; ; BE-LABEL: be_i64_to_i32_order: diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll index 6d6796d1c902dc..14dd43ed71a463 100644 --- a/llvm/test/CodeGen/X86/stores-merging.ll +++ b/llvm/test/CodeGen/X86/stores-merging.ll @@ -482,9 +482,8 @@ define void @trunc_i32_to_i16(i32 %x, i16* %p) { define void @be_i32_to_i16(i32 %x, i16* %p0) { ; CHECK-LABEL: be_i32_to_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: movw %di, 2(%rsi) -; CHECK-NEXT: shrl $16, %edi -; CHECK-NEXT: movw %di, (%rsi) +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -498,10 +497,8 @@ define void @be_i32_to_i16(i32 %x, i16* %p0) { define void @be_i32_to_i16_order(i32 %x, i16* %p0) { ; CHECK-LABEL: be_i32_to_i16_order: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shrl $16, %eax -; CHECK-NEXT: movw %ax, (%rsi) -; CHECK-NEXT: movw %di, 2(%rsi) +; CHECK-NEXT: rorl $16, %edi +; CHECK-NEXT: movl %edi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i32 %x, 16 %t0 = trunc i32 %x to i16 @@ -589,9 +586,8 @@ define void @trunc_i64_to_i32(i64 %x, i32* %p) { define void @be_i64_to_i32(i64 %x, i32* %p0) { ; CHECK-LABEL: be_i64_to_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, 4(%rsi) -; CHECK-NEXT: shrq $32, %rdi -; CHECK-NEXT: movl %edi, (%rsi) +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 @@ -605,10 +601,8 @@ define void @be_i64_to_i32(i64 %x, i32* %p0) { define void @be_i64_to_i32_order(i64 %x, i32* %p0) { ; CHECK-LABEL: be_i64_to_i32_order: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: shrq $32, %rax -; CHECK-NEXT: movl %eax, (%rsi) -; CHECK-NEXT: movl %edi, 4(%rsi) +; CHECK-NEXT: rorq $32, %rdi +; CHECK-NEXT: movq %rdi, (%rsi) ; CHECK-NEXT: retq %sh1 = lshr i64 %x, 32 %t0 = trunc i64 %x to i32 From f3a6f6ccfddfbd991269a917feb4ae9beb5a1610 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 10:41:05 -0700 Subject: [PATCH 015/161] [X86] Pre-commit new test case for D87214. NFC --- llvm/test/CodeGen/X86/iabs.ll | 85 +++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 338e66622dcd95..d9fc452510c784 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -120,3 +120,88 @@ define i64 @test_i64(i64 %a) nounwind { ret i64 %abs } +define i128 @test_i128(i128 %a) nounwind { +; X86-NO-CMOV-LABEL: test_i128: +; X86-NO-CMOV: # %bb.0: +; X86-NO-CMOV-NEXT: pushl %ebp +; X86-NO-CMOV-NEXT: pushl %ebx +; X86-NO-CMOV-NEXT: pushl %edi +; X86-NO-CMOV-NEXT: pushl %esi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-CMOV-NEXT: xorl %ecx, %ecx +; X86-NO-CMOV-NEXT: negl %ebp +; X86-NO-CMOV-NEXT: movl $0, %ebx +; X86-NO-CMOV-NEXT: sbbl %edx, %ebx +; X86-NO-CMOV-NEXT: movl $0, %edi +; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NO-CMOV-NEXT: sbbl %esi, %ecx +; X86-NO-CMOV-NEXT: testl %esi, %esi +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NO-CMOV-NEXT: js .LBB4_2 +; X86-NO-CMOV-NEXT: # %bb.1: +; X86-NO-CMOV-NEXT: movl %esi, %ecx +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NO-CMOV-NEXT: movl %edx, %ebx +; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NO-CMOV-NEXT: .LBB4_2: +; X86-NO-CMOV-NEXT: movl %ebp, (%eax) +; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax) +; X86-NO-CMOV-NEXT: movl %edi, 8(%eax) +; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax) +; X86-NO-CMOV-NEXT: popl %esi +; X86-NO-CMOV-NEXT: popl %edi +; X86-NO-CMOV-NEXT: popl %ebx +; X86-NO-CMOV-NEXT: popl %ebp +; X86-NO-CMOV-NEXT: retl $4 +; +; X86-CMOV-LABEL: test_i128: +; X86-CMOV: # %bb.0: +; X86-CMOV-NEXT: pushl %ebp +; X86-CMOV-NEXT: pushl %ebx +; X86-CMOV-NEXT: pushl %edi +; X86-CMOV-NEXT: pushl %esi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-CMOV-NEXT: xorl %esi, %esi +; X86-CMOV-NEXT: negl %edi +; X86-CMOV-NEXT: movl $0, %ebx +; X86-CMOV-NEXT: sbbl %edx, %ebx +; X86-CMOV-NEXT: movl $0, %ebp +; X86-CMOV-NEXT: sbbl %ecx, %ebp +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CMOV-NEXT: sbbl %eax, %esi +; X86-CMOV-NEXT: testl %eax, %eax +; X86-CMOV-NEXT: cmovnsl %eax, %esi +; X86-CMOV-NEXT: cmovnsl %ecx, %ebp +; X86-CMOV-NEXT: cmovnsl %edx, %ebx +; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi +; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CMOV-NEXT: movl %edi, (%eax) +; X86-CMOV-NEXT: movl %ebx, 4(%eax) +; X86-CMOV-NEXT: movl %ebp, 8(%eax) +; X86-CMOV-NEXT: movl %esi, 12(%eax) +; X86-CMOV-NEXT: popl %esi +; X86-CMOV-NEXT: popl %edi +; X86-CMOV-NEXT: popl %ebx +; X86-CMOV-NEXT: popl %ebp +; X86-CMOV-NEXT: retl $4 +; +; X64-LABEL: test_i128: +; X64: # %bb.0: +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: negq %rax +; X64-NEXT: sbbq %rsi, %rdx +; X64-NEXT: testq %rsi, %rsi +; X64-NEXT: cmovnsq %rdi, %rax +; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: retq + %tmp1neg = sub i128 0, %a + %b = icmp sgt i128 %a, -1 + %abs = select i1 %b, i128 %a, i128 %tmp1neg + ret i128 %abs +} + From 01b3e167575412792901c705032e304ef184a75d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 10:59:57 -0700 Subject: [PATCH 016/161] [X86] Use the same sequence for i128 ISD::ABS on 64-bit targets as we use for i64 on 32-bit targets. Differential Revision: https://reviews.llvm.org/D87214 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++++++++--- llvm/test/CodeGen/X86/abs.ll | 13 +++++++------ llvm/test/CodeGen/X86/iabs.ll | 13 +++++++------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1395db57b57a0d..ad8704f686c168 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -195,6 +195,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ABS , MVT::i32 , Custom); } setOperationAction(ISD::ABS , MVT::i64 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i128 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -29719,9 +29721,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::ABS: { - assert(N->getValueType(0) == MVT::i64 && + assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) && "Unexpected type (!= i64) on ABS."); - MVT HalfT = MVT::i32; + assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) && + "Unexpected type (!= i128) on ABS."); + MVT VT = N->getSimpleValueType(0); + MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32; SDValue Lo, Hi, Tmp; SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); @@ -29737,7 +29742,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue(Lo.getNode(), 1)); Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi)); return; } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 345830676abaaa..63faafc10ec8d3 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -132,13 +132,14 @@ define i64 @test_i64(i64 %a) nounwind { define i128 @test_i128(i128 %a) nounwind { ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: negq %rax -; X64-NEXT: sbbq %rsi, %rdx -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: cmovnsq %rdi, %rax -; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx ; X64-NEXT: retq ; ; X86-LABEL: test_i128: diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index d9fc452510c784..f052718d984004 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -191,13 +191,14 @@ define i128 @test_i128(i128 %a) nounwind { ; ; X64-LABEL: test_i128: ; X64: # %bb.0: -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: negq %rax -; X64-NEXT: sbbq %rsi, %rdx -; X64-NEXT: testq %rsi, %rsi -; X64-NEXT: cmovnsq %rdi, %rax -; X64-NEXT: cmovnsq %rsi, %rdx +; X64-NEXT: movq %rsi, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: addq %rcx, %rax +; X64-NEXT: adcq %rcx, %rdx +; X64-NEXT: xorq %rcx, %rax +; X64-NEXT: xorq %rcx, %rdx ; X64-NEXT: retq %tmp1neg = sub i128 0, %a %b = icmp sgt i128 %a, -1 From 70207816e35771459d053ab9faf75a50a4cb92fb Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 15:26:43 -0400 Subject: [PATCH 017/161] [InstCombine] add ptr difference tests; NFC --- llvm/test/Transforms/InstCombine/sub-gep.ll | 56 ++++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index cf9604223f6c15..fcb24eec349a5e 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -14,6 +14,32 @@ define i64 @test_inbounds([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_partial_inbounds1([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds1( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_partial_inbounds2([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_partial_inbounds2( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX:%.*]], 2 +; CHECK-NEXT: ret i64 [[P2_IDX]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p1 to i64 + %i2 = ptrtoint i32* %p2 to i64 + %d = sub i64 %i2, %i1 + ret i64 %d +} + define i64 @test_inbounds_nuw([0 x i32]* %base, i64 %idx) { ; CHECK-LABEL: @test_inbounds_nuw( ; CHECK-NEXT: [[P2_IDX:%.*]] = shl nuw nsw i64 [[IDX:%.*]], 2 @@ -69,13 +95,39 @@ define i64 @test_inbounds_nuw_swapped([0 x i32]* %base, i64 %idx) { ret i64 %d } +define i64 @test_inbounds1_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds1_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + +define i64 @test_inbounds2_nuw_swapped([0 x i32]* %base, i64 %idx) { +; CHECK-LABEL: @test_inbounds2_nuw_swapped( +; CHECK-NEXT: [[P2_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 +; CHECK-NEXT: ret i64 [[P2_IDX_NEG]] +; + %p1 = getelementptr [0 x i32], [0 x i32]* %base, i64 0, i64 0 + %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx + %i1 = ptrtoint i32* %p2 to i64 + %i2 = ptrtoint i32* %p1 to i64 + %d = sub nuw i64 %i2, %i1 + ret i64 %d +} + ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( ; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 ; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[DOTNEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] -; CHECK-NEXT: ret i64 [[DOTNEG]] +; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] +; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2 From 8b300679192b317aa91a28e781fcf60d4416b0d6 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 7 Sep 2020 15:47:57 -0400 Subject: [PATCH 018/161] [InstCombine] improve fold of pointer differences This was supposed to be an NFC cleanup, but there's a real logic difference (did not drop 'nsw') visible in some tests in addition to an efficiency improvement. This is because in the case where we have 2 GEPs, the code was *always* swapping the operands and negating the result. But if we have 2 GEPs, we should *never* need swapping/negation AFAICT. This is part of improving flags propagation noticed with PR47430. --- .../InstCombine/InstCombineAddSub.cpp | 34 +++++-------------- llvm/test/Transforms/InstCombine/sub-gep.ll | 6 ++-- llvm/test/Transforms/InstCombine/sub.ll | 34 +++++++++---------- 3 files changed, 29 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 5cf6eb2a885a60..5ce32bc592d052 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1615,43 +1615,27 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS, // this. bool Swapped = false; GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; + if (!isa(LHS) && isa(RHS)) { + std::swap(LHS, RHS); + Swapped = true; + } - // For now we require one side to be the base pointer "A" or a constant - // GEP derived from it. - if (GEPOperator *LHSGEP = dyn_cast(LHS)) { + // Require at least one GEP with a common base pointer on both sides. + if (auto *LHSGEP = dyn_cast(LHS)) { // (gep X, ...) - X if (LHSGEP->getOperand(0) == RHS) { GEP1 = LHSGEP; - Swapped = false; - } else if (GEPOperator *RHSGEP = dyn_cast(RHS)) { + } else if (auto *RHSGEP = dyn_cast(RHS)) { // (gep X, ...) - (gep X, ...) if (LHSGEP->getOperand(0)->stripPointerCasts() == - RHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = RHSGEP; + RHSGEP->getOperand(0)->stripPointerCasts()) { GEP1 = LHSGEP; - Swapped = false; - } - } - } - - if (GEPOperator *RHSGEP = dyn_cast(RHS)) { - // X - (gep X, ...) - if (RHSGEP->getOperand(0) == LHS) { - GEP1 = RHSGEP; - Swapped = true; - } else if (GEPOperator *LHSGEP = dyn_cast(LHS)) { - // (gep X, ...) - (gep X, ...) - if (RHSGEP->getOperand(0)->stripPointerCasts() == - LHSGEP->getOperand(0)->stripPointerCasts()) { - GEP2 = LHSGEP; - GEP1 = RHSGEP; - Swapped = true; + GEP2 = RHSGEP; } } } if (!GEP1) - // No GEP found. return nullptr; if (GEP2) { diff --git a/llvm/test/Transforms/InstCombine/sub-gep.ll b/llvm/test/Transforms/InstCombine/sub-gep.ll index fcb24eec349a5e..f31eeb46d88231 100644 --- a/llvm/test/Transforms/InstCombine/sub-gep.ll +++ b/llvm/test/Transforms/InstCombine/sub-gep.ll @@ -124,10 +124,10 @@ define i64 @test_inbounds2_nuw_swapped([0 x i32]* %base, i64 %idx) { ; The sub and shl here could be nuw, but this is harder to handle. define i64 @test_inbounds_nuw_two_gep([0 x i32]* %base, i64 %idx, i64 %idx2) { ; CHECK-LABEL: @test_inbounds_nuw_two_gep( +; CHECK-NEXT: [[P2_IDX:%.*]] = shl nsw i64 [[IDX2:%.*]], 2 ; CHECK-NEXT: [[P1_IDX_NEG:%.*]] = mul i64 [[IDX:%.*]], -4 -; CHECK-NEXT: [[P2_IDX_NEG_NEG:%.*]] = shl i64 [[IDX2:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[P2_IDX_NEG_NEG]], [[P1_IDX_NEG]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[P1_IDX_NEG]], [[P2_IDX]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %p1 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx %p2 = getelementptr inbounds [0 x i32], [0 x i32]* %base, i64 0, i64 %idx2 diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index dbe1631226d658..437d8f8c5c0231 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -505,9 +505,9 @@ define i64 @test24b(i8* %P, i64 %A){ define i64 @test25(i8* %P, i64 %A){ ; CHECK-LABEL: @test25( -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i64 [[A:%.*]], 1 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i64 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i64 [[A:%.*]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i64 [[B_IDX]], -84 +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16]* @Arr, i64 0, i64 %A %C = ptrtoint i16* %B to i64 @@ -520,9 +520,9 @@ define i64 @test25(i8* %P, i64 %A){ define i16 @test25_as1(i8 addrspace(1)* %P, i64 %A) { ; CHECK-LABEL: @test25_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A:%.*]] to i16 -; CHECK-NEXT: [[B_IDX_NEG_NEG:%.*]] = shl i16 [[TMP1]], 1 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = add i16 [[B_IDX_NEG_NEG]], -84 -; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw i16 [[TMP1]], 1 +; CHECK-NEXT: [[GEPDIFF:%.*]] = add i16 [[B_IDX]], -84 +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %B = getelementptr inbounds [42 x i16], [42 x i16] addrspace(1)* @Arr_as1, i64 0, i64 %A %C = ptrtoint i16 addrspace(1)* %B to i16 @@ -825,8 +825,8 @@ define i32 @test28commuted(i32 %x, i32 %y, i32 %z) { define i64 @test29(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test29( -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[I:%.*]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds i8, i8* %foo, i64 %i %gep2 = getelementptr inbounds i8, i8* %foo, i64 %j @@ -838,9 +838,9 @@ define i64 @test29(i8* %foo, i64 %i, i64 %j) { define i64 @test30(i8* %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test30( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i64 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %bit = bitcast i8* %foo to i32* %gep1 = getelementptr inbounds i32, i32* %bit, i64 %i @@ -853,9 +853,9 @@ define i64 @test30(i8* %foo, i64 %i, i64 %j) { define i16 @test30_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; CHECK-LABEL: @test30_as1( -; CHECK-NEXT: [[GEP1_IDX_NEG_NEG:%.*]] = shl i16 [[I:%.*]], 2 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i16 [[GEP1_IDX_NEG_NEG]], [[J:%.*]] -; CHECK-NEXT: ret i16 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i16 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i16 [[GEPDIFF]] ; %bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)* %gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i @@ -1234,10 +1234,10 @@ define i64 @test58([100 x [100 x i8]]* %foo, i64 %i, i64 %j) { ; "%sub = i64 %i, %j, ret i64 %sub" ; gep1 and gep2 have only one use ; CHECK-LABEL: @test58( -; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 ; CHECK-NEXT: [[GEP1_OFFS:%.*]] = add i64 [[I:%.*]], 4200 -; CHECK-NEXT: [[GEPDIFF_NEG:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] -; CHECK-NEXT: ret i64 [[GEPDIFF_NEG]] +; CHECK-NEXT: [[GEP2_OFFS:%.*]] = add i64 [[J:%.*]], 4200 +; CHECK-NEXT: [[GEPDIFF:%.*]] = sub i64 [[GEP1_OFFS]], [[GEP2_OFFS]] +; CHECK-NEXT: ret i64 [[GEPDIFF]] ; %gep1 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %i %gep2 = getelementptr inbounds [100 x [100 x i8]], [100 x [100 x i8]]* %foo, i64 0, i64 42, i64 %j From da79b1eecc65171f6ca0cda9b4f1970bd1503c17 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 12:23:15 -0700 Subject: [PATCH 019/161] [SelectionDAG][X86][ARM] Teach ExpandIntRes_ABS to use sra+add+xor expansion when ADDCARRY is supported. Rather than using SELECT instructions, use SRA, UADDO/ADDCARRY and XORs to expand ABS. This is the multi-part version of the sequence we use in LegalizeDAG. It's also the same as the Custom sequence uses for i64 on 32-bit and i128 on 64-bit. So we can remove the X86 customization. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D87215 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 28 +++++- llvm/lib/Target/X86/X86ISelLowering.cpp | 30 +----- llvm/test/CodeGen/Thumb2/mve-abs.ll | 35 +++---- llvm/test/CodeGen/X86/abs.ll | 38 ++++---- llvm/test/CodeGen/X86/iabs.ll | 95 ++++++------------- 5 files changed, 85 insertions(+), 141 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 77a79a0479ef76..e1881c20e5b3b5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2789,16 +2789,38 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); + SDValue N0 = N->getOperand(0); + GetExpandedInteger(N0, Lo, Hi); + EVT NVT = Lo.getValueType(); + + // If we have ADDCARRY, use the expanded form of the sra+add+xor sequence we + // use in LegalizeDAG. The ADD part of the expansion is based on + // ExpandIntRes_ADDSUB which also uses ADDCARRY/UADDO after checking that + // ADDCARRY is LegalOrCustom. Each of the pieces here can be further expanded + // if needed. Shift expansion has a special case for filling with sign bits + // so that we will only end up with one SRA. + bool HasAddCarry = TLI.isOperationLegalOrCustom( + ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (HasAddCarry) { + EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG); + SDValue Sign = + DAG.getNode(ISD::SRA, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); + SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT)); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Lo, Sign); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Hi, Sign, Lo.getValue(1)); + Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign); + Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign); + return; + } + // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo) EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), N0); SDValue NegLo, NegHi; SplitInteger(Neg, NegLo, NegHi); - GetExpandedInteger(N0, Lo, Hi); - EVT NVT = Lo.getValueType(); SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT), DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT); Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ad8704f686c168..2c7c36325f1469 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -193,10 +193,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); setOperationAction(ISD::ABS , MVT::i32 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i64 , Custom); } - setOperationAction(ISD::ABS , MVT::i64 , Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::ABS , MVT::i128 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -29720,31 +29719,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res); return; } - case ISD::ABS: { - assert((Subtarget.is64Bit() || N->getValueType(0) == MVT::i64) && - "Unexpected type (!= i64) on ABS."); - assert((!Subtarget.is64Bit() || N->getValueType(0) == MVT::i128) && - "Unexpected type (!= i128) on ABS."); - MVT VT = N->getSimpleValueType(0); - MVT HalfT = VT == MVT::i128 ? MVT::i64 : MVT::i32; - SDValue Lo, Hi, Tmp; - SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); - - Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(0, dl, HalfT)); - Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), - DAG.getConstant(1, dl, HalfT)); - Tmp = DAG.getNode( - ISD::SRA, dl, HalfT, Hi, - DAG.getShiftAmountConstant(HalfT.getSizeInBits() - 1, HalfT, dl)); - Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, - SDValue(Lo.getNode(), 1)); - Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); - Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi)); - return; - } // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32. case X86ISD::FMINC: case X86ISD::FMIN: diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll index 0b5dcbced1a562..8a9b8814ef2ec8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-abs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -40,33 +40,24 @@ entry: define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) { ; CHECK-LABEL: abs_v2i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r1, s0 -; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: rsbs.w lr, r1, #0 -; CHECK-NEXT: sbc.w r2, r12, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r3, mi -; CHECK-NEXT: ands r3, r3, #1 -; CHECK-NEXT: csel r1, lr, r1, ne -; CHECK-NEXT: csel r0, r2, r0, ne -; CHECK-NEXT: vmov.32 q1[0], r1 -; CHECK-NEXT: vmov r1, s2 -; CHECK-NEXT: vmov.32 q1[1], r0 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: adds.w r1, r1, r0, asr #31 +; CHECK-NEXT: adc.w r2, r0, r0, asr #31 +; CHECK-NEXT: eor.w r2, r2, r0, asr #31 +; CHECK-NEXT: eor.w r0, r1, r0, asr #31 +; CHECK-NEXT: vmov.32 q1[0], r0 ; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: rsbs r2, r1, #0 -; CHECK-NEXT: sbc.w r12, r12, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: cset r3, mi -; CHECK-NEXT: ands r3, r3, #1 -; CHECK-NEXT: csel r1, r2, r1, ne -; CHECK-NEXT: csel r0, r12, r0, ne +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov.32 q1[1], r2 +; CHECK-NEXT: adds.w r1, r1, r0, asr #31 +; CHECK-NEXT: eor.w r1, r1, r0, asr #31 ; CHECK-NEXT: vmov.32 q1[2], r1 +; CHECK-NEXT: adc.w r1, r0, r0, asr #31 +; CHECK-NEXT: eor.w r0, r1, r0, asr #31 ; CHECK-NEXT: vmov.32 q1[3], r0 ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: bx lr entry: %0 = icmp slt <2 x i64> %s1, zeroinitializer %1 = sub nsw <2 x i64> zeroinitializer, %s1 diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 63faafc10ec8d3..8e20b001cc3e87 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -144,35 +144,31 @@ define i128 @test_i128(i128 %a) nounwind { ; ; X86-LABEL: test_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: negl %edi -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %edx, %ebx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: testl %eax, %eax -; X86-NEXT: cmovnsl %eax, %esi -; X86-NEXT: cmovnsl %ecx, %ebp -; X86-NEXT: cmovnsl %edx, %ebx -; X86-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, (%eax) -; X86-NEXT: movl %ebx, 4(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %esi, 12(%eax) +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: xorl %edx, %ebx +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 %r = call i128 @llvm.abs.i128(i128 %a, i1 false) ret i128 %r diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index f052718d984004..319eb6f5edc328 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -121,73 +121,34 @@ define i64 @test_i64(i64 %a) nounwind { } define i128 @test_i128(i128 %a) nounwind { -; X86-NO-CMOV-LABEL: test_i128: -; X86-NO-CMOV: # %bb.0: -; X86-NO-CMOV-NEXT: pushl %ebp -; X86-NO-CMOV-NEXT: pushl %ebx -; X86-NO-CMOV-NEXT: pushl %edi -; X86-NO-CMOV-NEXT: pushl %esi -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NO-CMOV-NEXT: xorl %ecx, %ecx -; X86-NO-CMOV-NEXT: negl %ebp -; X86-NO-CMOV-NEXT: movl $0, %ebx -; X86-NO-CMOV-NEXT: sbbl %edx, %ebx -; X86-NO-CMOV-NEXT: movl $0, %edi -; X86-NO-CMOV-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NO-CMOV-NEXT: sbbl %esi, %ecx -; X86-NO-CMOV-NEXT: testl %esi, %esi -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NO-CMOV-NEXT: js .LBB4_2 -; X86-NO-CMOV-NEXT: # %bb.1: -; X86-NO-CMOV-NEXT: movl %esi, %ecx -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NO-CMOV-NEXT: movl %edx, %ebx -; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NO-CMOV-NEXT: .LBB4_2: -; X86-NO-CMOV-NEXT: movl %ebp, (%eax) -; X86-NO-CMOV-NEXT: movl %ebx, 4(%eax) -; X86-NO-CMOV-NEXT: movl %edi, 8(%eax) -; X86-NO-CMOV-NEXT: movl %ecx, 12(%eax) -; X86-NO-CMOV-NEXT: popl %esi -; X86-NO-CMOV-NEXT: popl %edi -; X86-NO-CMOV-NEXT: popl %ebx -; X86-NO-CMOV-NEXT: popl %ebp -; X86-NO-CMOV-NEXT: retl $4 -; -; X86-CMOV-LABEL: test_i128: -; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebp -; X86-CMOV-NEXT: pushl %ebx -; X86-CMOV-NEXT: pushl %edi -; X86-CMOV-NEXT: pushl %esi -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-CMOV-NEXT: xorl %esi, %esi -; X86-CMOV-NEXT: negl %edi -; X86-CMOV-NEXT: movl $0, %ebx -; X86-CMOV-NEXT: sbbl %edx, %ebx -; X86-CMOV-NEXT: movl $0, %ebp -; X86-CMOV-NEXT: sbbl %ecx, %ebp -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: sbbl %eax, %esi -; X86-CMOV-NEXT: testl %eax, %eax -; X86-CMOV-NEXT: cmovnsl %eax, %esi -; X86-CMOV-NEXT: cmovnsl %ecx, %ebp -; X86-CMOV-NEXT: cmovnsl %edx, %ebx -; X86-CMOV-NEXT: cmovnsl {{[0-9]+}}(%esp), %edi -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl %edi, (%eax) -; X86-CMOV-NEXT: movl %ebx, 4(%eax) -; X86-CMOV-NEXT: movl %ebp, 8(%eax) -; X86-CMOV-NEXT: movl %esi, 12(%eax) -; X86-CMOV-NEXT: popl %esi -; X86-CMOV-NEXT: popl %edi -; X86-CMOV-NEXT: popl %ebx -; X86-CMOV-NEXT: popl %ebp -; X86-CMOV-NEXT: retl $4 +; X86-LABEL: test_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: sarl $31, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl %edx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: adcl %edx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: adcl %edx, %ebx +; X86-NEXT: adcl %edx, %ecx +; X86-NEXT: xorl %edx, %ecx +; X86-NEXT: xorl %edx, %ebx +; X86-NEXT: xorl %edx, %edi +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: movl %esi, (%eax) +; X86-NEXT: movl %edi, 4(%eax) +; X86-NEXT: movl %ebx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl $4 ; ; X64-LABEL: test_i128: ; X64: # %bb.0: From 9fb46a452d4e5666828c95610ceac8dcd9e4ce16 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 10:27:23 +0200 Subject: [PATCH 020/161] [SCCP] Compute ranges for supported intrinsics For intrinsics supported by ConstantRange, compute the result range based on the argument ranges. We do this independently of whether some or all of the input ranges are full, as we can often still constrain the result in some way. Differential Revision: https://reviews.llvm.org/D87183 --- llvm/lib/Transforms/Scalar/SCCP.cpp | 19 +++++++++++++++++++ llvm/test/Transforms/SCCP/intrinsics.ll | 18 ++++++------------ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp index 2afc778ed82141..33ab2907906e05 100644 --- a/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1350,6 +1350,25 @@ void SCCPSolver::handleCallResult(CallBase &CB) { return (void)mergeInValue(IV, &CB, CopyOfVal); } + + if (ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) { + // Compute result range for intrinsics supported by ConstantRange. + // Do this even if we don't know a range for all operands, as we may + // still know something about the result range, e.g. of abs(x). + SmallVector OpRanges; + for (Value *Op : II->args()) { + const ValueLatticeElement &State = getValueState(Op); + if (State.isConstantRange()) + OpRanges.push_back(State.getConstantRange()); + else + OpRanges.push_back( + ConstantRange::getFull(Op->getType()->getScalarSizeInBits())); + } + + ConstantRange Result = + ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges); + return (void)mergeInValue(II, ValueLatticeElement::getRange(Result)); + } } // The common case is that we aren't tracking the callee, either because we diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll index d06b94162b5be4..e261a59d3d6bcb 100644 --- a/llvm/test/Transforms/SCCP/intrinsics.ll +++ b/llvm/test/Transforms/SCCP/intrinsics.ll @@ -12,10 +12,8 @@ define void @abs1(i8* %p) { ; CHECK-LABEL: @abs1( ; CHECK-NEXT: [[X:%.*]] = load i8, i8* [[P:%.*]], align 1, [[RNG0:!range !.*]] ; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X]], i1 false) -; CHECK-NEXT: [[CMP1:%.*]] = icmp sge i8 [[ABS]], 0 -; CHECK-NEXT: call void @use(i1 [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i8 [[ABS]], 10 -; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[CMP3:%.*]] = icmp sge i8 [[ABS]], 1 ; CHECK-NEXT: call void @use(i1 [[CMP3]]) ; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i8 [[ABS]], 9 @@ -40,8 +38,7 @@ define void @abs1(i8* %p) { define void @abs2(i8 %x) { ; CHECK-LABEL: @abs2( ; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 true) -; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[ABS]], 0 -; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) @@ -68,10 +65,8 @@ define void @umax1(i8* %p1, i8* %p2) { ; CHECK-NEXT: [[X1:%.*]] = load i8, i8* [[P1:%.*]], align 1, [[RNG1:!range !.*]] ; CHECK-NEXT: [[X2:%.*]] = load i8, i8* [[P2:%.*]], align 1, [[RNG2:!range !.*]] ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X1]], i8 [[X2]]) -; CHECK-NEXT: [[CMP1:%.*]] = icmp uge i8 [[M]], 5 -; CHECK-NEXT: call void @use(i1 [[CMP1]]) -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[M]], 15 -; CHECK-NEXT: call void @use(i1 [[CMP2]]) +; CHECK-NEXT: call void @use(i1 true) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: [[CMP3:%.*]] = icmp uge i8 [[M]], 6 ; CHECK-NEXT: call void @use(i1 [[CMP3]]) ; CHECK-NEXT: [[CMP4:%.*]] = icmp ult i8 [[M]], 14 @@ -95,8 +90,7 @@ define void @umax1(i8* %p1, i8* %p2) { define void @umax2(i8 %x) { ; CHECK-LABEL: @umax2( ; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 10) -; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[M]], 10 -; CHECK-NEXT: call void @use(i1 [[CMP]]) +; CHECK-NEXT: call void @use(i1 true) ; CHECK-NEXT: ret void ; %m = call i8 @llvm.umax.i8(i8 %x, i8 10) From ddab4cd83ea31141aaada424dccf94278482ee88 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 7 Sep 2020 21:07:02 +0200 Subject: [PATCH 021/161] [KnownBits] Avoid some copies (NFC) These lambdas don't need copies, use const reference. --- llvm/lib/Support/KnownBits.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index aad50e1240341d..03843687c10a49 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -115,13 +115,13 @@ KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) { KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [0, 0xFFFFFFFF] <-> [0xFFFFFFFF, 0] - auto Flip = [](KnownBits Val) { return KnownBits(Val.One, Val.Zero); }; + auto Flip = [](const KnownBits &Val) { return KnownBits(Val.One, Val.Zero); }; return Flip(umax(Flip(LHS), Flip(RHS))); } KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0, 0xFFFFFFFF] - auto Flip = [](KnownBits Val) { + auto Flip = [](const KnownBits &Val) { unsigned SignBitPosition = Val.getBitWidth() - 1; APInt Zero = Val.Zero; APInt One = Val.One; @@ -134,7 +134,7 @@ KnownBits KnownBits::smax(const KnownBits &LHS, const KnownBits &RHS) { KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) { // Flip the range of values: [-0x80000000, 0x7FFFFFFF] <-> [0xFFFFFFFF, 0] - auto Flip = [](KnownBits Val) { + auto Flip = [](const KnownBits &Val) { unsigned SignBitPosition = Val.getBitWidth() - 1; APInt Zero = Val.One; APInt One = Val.Zero; From bb7d3af1139c36270bc9948605e06f40e4c51541 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Mon, 7 Sep 2020 23:54:06 +0300 Subject: [PATCH 022/161] Reland [SimplifyCFG][LoopRotate] SimplifyCFG: disable common instruction hoisting by default, enable late in pipeline This was reverted in 503deec2183d466dad64b763bab4e15fd8804239 because it caused gigantic increase (3x) in branch mispredictions in certain benchmarks on certain CPU's, see https://reviews.llvm.org/D84108#2227365. It has since been investigated and here are the results: https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20200907/827578.html > It's an amazingly severe regression, but it's also all due to branch > mispredicts (about 3x without this). The code layout looks ok so there's > probably something else to deal with. I'm not sure there's anything we can > reasonably do so we'll just have to take the hit for now and wait for > another code reorganization to make the branch predictor a bit more happy :) > > Thanks for giving us some time to investigate and feel free to recommit > whenever you'd like. > > -eric So let's just reland this. Original commit message: I've been looking at missed vectorizations in one codebase. One particular thing that stands out is that some of the loops reach vectorizer in a rather mangled form, with weird PHI's, and some of the loops aren't even in a rotated form. After taking a more detailed look, that happened because the loop's headers were too big by then. It is evident that SimplifyCFG's common code hoisting transform is at fault there, because the pattern it handles is precisely the unrotated loop basic block structure. Surprizingly, `SimplifyCFGOpt::HoistThenElseCodeToIf()` is enabled by default, and is always run, unlike it's friend, common code sinking transform, `SinkCommonCodeFromPredecessors()`, which is not enabled by default and is only run once very late in the pipeline. I'm proposing to harmonize this, and disable common code hoisting until //late// in pipeline. Definition of //late// may vary, here currently i've picked the same one as for code sinking, but i suppose we could enable it as soon as right after loop rotation happens. Experimentation shows that this does indeed unsurprizingly help, more loops got rotated, although other issues remain elsewhere. Now, this undoubtedly seriously shakes phase ordering. This will undoubtedly be a mixed bag in terms of both compile- and run- time performance, codesize. Since we no longer aggressively hoist+deduplicate common code, we don't pay the price of said hoisting (which wasn't big). That may allow more loops to be rotated, so we pay that price. That, in turn, that may enable all the transforms that require canonical (rotated) loop form, including but not limited to vectorization, so we pay that too. And in general, no deduplication means more [duplicate] instructions going through the optimizations. But there's still late hoisting, some of them will be caught late. As per benchmarks i've run {F12360204}, this is mostly within the noise, there are some small improvements, some small regressions. One big regression i saw i fixed in rG8d487668d09fb0e4e54f36207f07c1480ffabbfd, but i'm sure this will expose many more pre-existing missed optimizations, as usual :S llvm-compile-time-tracker.com thoughts on this: http://llvm-compile-time-tracker.com/compare.php?from=e40315d2b4ed1e38962a8f33ff151693ed4ada63&to=c8289c0ecbf235da9fb0e3bc052e3c0d6bff5cf9&stat=instructions * this does regress compile-time by +0.5% geomean (unsurprizingly) * size impact varies; for ThinLTO it's actually an improvement The largest fallout appears to be in GVN's load partial redundancy elimination, it spends *much* more time in `MemoryDependenceResults::getNonLocalPointerDependency()`. Non-local `MemoryDependenceResults` is widely-known to be, uh, costly. There does not appear to be a proper solution to this issue, other than silencing the compile-time performance regression by tuning cut-off thresholds in `MemoryDependenceResults`, at the cost of potentially regressing run-time performance. D84609 attempts to move in that direction, but the path is unclear and is going to take some time. If we look at stats before/after diffs, some excerpts: * RawSpeed (the target) {F12360200} * -14 (-73.68%) loops not rotated due to the header size (yay) * -272 (-0.67%) `"Number of live out of a loop variables"` - good for vectorizer * -3937 (-64.19%) common instructions hoisted * +561 (+0.06%) x86 asm instructions * -2 basic blocks * +2418 (+0.11%) IR instructions * vanilla test-suite + RawSpeed + darktable {F12360201} * -36396 (-65.29%) common instructions hoisted * +1676 (+0.02%) x86 asm instructions * +662 (+0.06%) basic blocks * +4395 (+0.04%) IR instructions It is likely to be sub-optimal for when optimizing for code size, so one might want to change tune pipeline by enabling sinking/hoisting when optimizing for size. Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D84108 This reverts commit 503deec2183d466dad64b763bab4e15fd8804239. --- .../Transforms/Utils/SimplifyCFGOptions.h | 2 +- llvm/lib/Passes/PassBuilder.cpp | 13 +++++---- .../Target/AArch64/AArch64TargetMachine.cpp | 1 + llvm/lib/Target/ARM/ARMTargetMachine.cpp | 3 +- .../Target/Hexagon/HexagonTargetMachine.cpp | 1 + .../lib/Transforms/IPO/PassManagerBuilder.cpp | 3 ++ .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 4 +-- llvm/test/Transforms/PGOProfile/chr.ll | 7 +++++ .../loop-rotation-vs-common-code-hoisting.ll | 29 +++++++++---------- .../SimplifyCFG/common-code-hoisting.ll | 2 +- 10 files changed, 39 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index 46f6ca0462f8b2..fb3a7490346f40 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -25,7 +25,7 @@ struct SimplifyCFGOptions { bool ForwardSwitchCondToPhi = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; - bool HoistCommonInsts = true; + bool HoistCommonInsts = false; bool SinkCommonInsts = false; bool SimplifyCondBranch = true; bool FoldTwoEntryPHINode = true; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 9df6a985789eaa..9a2e895d7b7176 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1160,11 +1160,14 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline( // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). - forwardSwitchCondToPhi(true). - convertSwitchToLookupTable(true). - needCanonicalLoops(false). - sinkCommonInsts(true))); + // FIXME: study whether hoisting and/or sinking of common instructions should + // be delayed until after SLP vectorizer. + OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); // Optimize parallel scalar instruction chains into SIMD instructions. if (PTO.SLPVectorization) diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 8b15898c1c1408..d7a14a3dc77281 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -455,6 +455,7 @@ void AArch64PassConfig::addIRPasses() { .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); // Run LoopDataPrefetch diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 55ac332e2c6a61..5068f9b5a0f46e 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -407,7 +407,8 @@ void ARMPassConfig::addIRPasses() { // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( - SimplifyCFGOptions().sinkCommonInsts(true), [this](const Function &F) { + SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true), + [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 6728306db3d571..37cf391c99838c 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -327,6 +327,7 @@ void HexagonPassConfig::addIRPasses() { .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 326d1ab28b60a2..caa9a98ecb0749 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -784,10 +784,13 @@ void PassManagerBuilder::populateModulePassManager( // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. + // FIXME: study whether hoisting and/or sinking of common instructions should + // be delayed until after SLP vectorizer. MPM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) + .hoistCommonInsts(true) .sinkCommonInsts(true))); if (SLPVectorize) { diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index db5211df397a87..b0435bf6e4eac6 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -63,8 +63,8 @@ static cl::opt UserForwardSwitchCond( cl::desc("Forward switch condition to phi ops (default = false)")); static cl::opt UserHoistCommonInsts( - "hoist-common-insts", cl::Hidden, cl::init(true), - cl::desc("hoist common instructions (default = true)")); + "hoist-common-insts", cl::Hidden, cl::init(false), + cl::desc("hoist common instructions (default = false)")); static cl::opt UserSinkCommonInsts( "sink-common-insts", cl::Hidden, cl::init(false), diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index c2e1ae4f53a0f8..1a22d7f0b84983 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -2006,9 +2006,16 @@ define i64 @test_chr_22(i1 %i, i64* %j, i64 %v0) !prof !14 { ; CHECK-NEXT: bb0: ; CHECK-NEXT: [[REASS_ADD:%.*]] = shl i64 [[V0:%.*]], 1 ; CHECK-NEXT: [[V2:%.*]] = add i64 [[REASS_ADD]], 3 +; CHECK-NEXT: [[C1:%.*]] = icmp slt i64 [[V2]], 100 +; CHECK-NEXT: br i1 [[C1]], label [[BB0_SPLIT:%.*]], label [[BB0_SPLIT_NONCHR:%.*]], !prof !15 +; CHECK: bb0.split: ; CHECK-NEXT: [[V299:%.*]] = mul i64 [[V2]], 7860086430977039991 ; CHECK-NEXT: store i64 [[V299]], i64* [[J:%.*]], align 4 ; CHECK-NEXT: ret i64 99 +; CHECK: bb0.split.nonchr: +; CHECK-NEXT: [[V299_NONCHR:%.*]] = mul i64 [[V2]], 7860086430977039991 +; CHECK-NEXT: store i64 [[V299_NONCHR]], i64* [[J]], align 4 +; CHECK-NEXT: ret i64 99 ; bb0: %v1 = add i64 %v0, 3 diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll index 1d8cce6879e9d6..314af1c1414548 100644 --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -5,14 +5,11 @@ ; RUN: opt -O3 -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK2 ; RUN: opt -passes='default' -rotation-max-header-size=1 -S < %s | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK3 -; RUN: opt -O3 -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK4 -; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK5 +; RUN: opt -O3 -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK4 +; RUN: opt -passes='default' -rotation-max-header-size=2 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK5 -; RUN: opt -O3 -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK6 -; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK7 - -; RUN: opt -O3 -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK8 -; RUN: opt -passes='default' -rotation-max-header-size=4 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK9 +; RUN: opt -O3 -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK6 +; RUN: opt -passes='default' -rotation-max-header-size=3 -S < %s | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK7 ; This example is produced from a very basic C code: ; @@ -61,8 +58,8 @@ define void @_Z4loopi(i32 %width) { ; HOIST-NEXT: br label [[FOR_COND:%.*]] ; HOIST: for.cond: ; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]] +; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; HOIST: for.cond.cleanup: ; HOIST-NEXT: tail call void @f2() @@ -80,17 +77,17 @@ define void @_Z4loopi(i32 %width) { ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: ; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATED_LATER_OLDPM: for.cond.cleanup: +; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() ; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_OLDPM: for.body: ; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] +; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() ; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw i32 [[I_04]], 1 -; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_OLDPM: return: @@ -102,19 +99,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0 ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader.for.body_crit_edge: ; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATED_LATER_NEWPM: for.cond.cleanup: +; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: ; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ] -; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() +; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: @@ -129,19 +126,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_OLDPM: for.cond.preheader: ; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_OLDPM: for.body.preheader: ; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_OLDPM: for.cond.cleanup: +; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f2() ; ROTATE_OLDPM-NEXT: br label [[RETURN]] ; ROTATE_OLDPM: for.body: ; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f1() ; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] ; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_OLDPM: return: @@ -153,19 +150,19 @@ define void @_Z4loopi(i32 %width) { ; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_NEWPM: for.cond.preheader: ; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_NEWPM: for.body.preheader: ; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_NEWPM: for.cond.cleanup: +; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f2() ; ROTATE_NEWPM-NEXT: br label [[RETURN]] ; ROTATE_NEWPM: for.body: ; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] -; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: tail call void @f0() +; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] ; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; ROTATE_NEWPM: for.body.for.body_crit_edge: diff --git a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll index b58017ba7ef0b9..37cbc4640e4153 100644 --- a/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll +++ b/llvm/test/Transforms/SimplifyCFG/common-code-hoisting.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -simplifycfg -hoist-common-insts=1 -S < %s | FileCheck %s --check-prefixes=HOIST ; RUN: opt -simplifycfg -hoist-common-insts=0 -S < %s | FileCheck %s --check-prefixes=NOHOIST -; RUN: opt -simplifycfg -S < %s | FileCheck %s --check-prefixes=HOIST,DEFAULT +; RUN: opt -simplifycfg -S < %s | FileCheck %s --check-prefixes=NOHOIST,DEFAULT ; This example is produced from a very basic C code: ; From 5f5a0bb0872a9673bad08b38bc0b14c42263902a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Sep 2020 14:44:53 -0700 Subject: [PATCH 023/161] [asan][test] Use --image-base for Linux/asan_prelink_test.cpp if ld is LLD LLD supports -Ttext but with the option there is still a PT_LOAD at address zero and thus the Linux kernel will map it to a different address and the test will fail. Use --image-base instead. --- .../test/asan/TestCases/Linux/asan_prelink_test.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp b/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp index e00c215e92b117..9c70b61291b36d 100644 --- a/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp +++ b/compiler-rt/test/asan/TestCases/Linux/asan_prelink_test.cpp @@ -1,11 +1,12 @@ // Test if asan works with prelink. -// It does not actually use prelink, but relies on ld's flag -Ttext-segment -// or gold's flag -Ttext (we try the first flag first, if that fails we +// It does not actually use prelink, but relies on GNU ld's -Ttext-segment, +// LLD's --image-base, or gold's -Ttext (we try the first flag first, if that fails we // try the second flag). // // RUN: %clangxx_asan -c %s -o %t.o // RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext-segment=0x3600000000 ||\ -// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext=0x3600000000 +// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,--image-base=0x3600000000 ||\ +// RUN: %clangxx_asan -DBUILD_SO=1 -fPIC -shared %s -o %t.so -Wl,-Ttext=0x3600000000 // RUN: %clangxx_asan %t.o %t.so -Wl,-R. -o %t // RUN: %env_asan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s From efb8e156daa120a25f993b3142ef8d6ef766df5a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 7 Sep 2020 22:52:10 +0100 Subject: [PATCH 024/161] [DSE,MemorySSA] Add an early check for read clobbers to traversal. Depending on the benchmark, this early exit can save a substantial amount of compile-time: http://llvm-compile-time-tracker.com/compare.php?from=505f2d817aa8e07ba98e5fd4a8f6ff0666f89df1&to=eb4e441147f9b4b7a5fcbbc57428cadbe9e01f10&stat=instructions --- .../Scalar/DeadStoreElimination.cpp | 12 ++++ .../MSSA/read-clobber-after-overwrite.ll | 58 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 109e15d6d7cfc5..49e811b298a605 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1901,6 +1901,18 @@ struct DSEState { return None; } + // Quick check if there are direct uses that are read-clobbers. + if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) { + if (auto *UseOrDef = dyn_cast(U.getUser())) + return !MSSA.dominates(StartAccess, UseOrDef) && + isReadClobber(DefLoc, UseOrDef->getMemoryInst()); + return false; + })) { + Cache.KnownReads.insert(Current); + LLVM_DEBUG(dbgs() << " ... found a read clobber\n"); + return None; + } + // If Current cannot be analyzed or is not removable, check the next // candidate. if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI)) { diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll new file mode 100644 index 00000000000000..4f704c35a90b10 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/read-clobber-after-overwrite.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -dse -enable-dse-memoryssa -S %s | FileCheck %s + +declare i1 @cond() readnone + +define i32 @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M0:%.*]] = alloca [4 x i32], align 16 +; CHECK-NEXT: br label [[LOOP_1:%.*]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LOOP_2:%.*]] +; CHECK: loop.2: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[LOOP_1]] ], [ [[IV_NEXT:%.*]], [[LOOP_2]] ] +; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 3, i64 [[IV]] +; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 0, i64 [[IV]] +; CHECK-NEXT: store i32 20, i32* [[PTR_2]], align 4 +; CHECK-NEXT: store i32 30, i32* [[PTR_1]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_1_LATCH:%.*]], label [[LOOP_2]] +; CHECK: loop.1.latch: +; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT:%.*]], label [[LOOP_1]] +; CHECK: exit: +; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* [[M0]], i64 0, i64 1 +; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR_3]], align 16 +; CHECK-NEXT: ret i32 [[LV]] +; +entry: + %M0 = alloca [4 x i32], align 16 + br label %loop.1 + +loop.1: + br label %loop.2 + +loop.2: + %iv = phi i64 [ 0, %loop.1 ], [ %iv.next, %loop.2 ] + %ptr.1 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 3, i64 %iv + store i32 10, i32* %ptr.1, align 4 + %ptr.2 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 0, i64 %iv + store i32 20, i32* %ptr.2, align 4 + store i32 30, i32* %ptr.1, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %c.3 = call i1 @cond() + br i1 %c.3, label %loop.1.latch, label %loop.2 + +loop.1.latch: + %c.2 = call i1 @cond() + br i1 %c.2, label %exit, label %loop.1 + +exit: + %ptr.3 = getelementptr inbounds [4 x i32], [4 x i32]* %M0, i64 0, i64 1 + %lv = load i32, i32* %ptr.3, align 16 + ret i32 %lv + + +} From 3e782bf8090c80e6d75e62cd52c9ed32715cbcdd Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Fri, 21 Aug 2020 13:42:20 -0700 Subject: [PATCH 025/161] [Sema][MSVC] warn at dynamic_cast when /GR- is given Differential Revision: https://reviews.llvm.org/D86369 --- clang/include/clang/Basic/DiagnosticGroups.td | 2 ++ .../clang/Basic/DiagnosticSemaKinds.td | 6 ++++++ clang/lib/Sema/SemaCast.cpp | 12 +++++++++++ clang/lib/Sema/SemaExprCXX.cpp | 6 ++++++ clang/test/SemaCXX/ms_no_dynamic_cast.cpp | 21 +++++++++++++++++++ clang/test/SemaCXX/no_dynamic_cast.cpp | 21 +++++++++++++++++++ 6 files changed, 68 insertions(+) create mode 100644 clang/test/SemaCXX/ms_no_dynamic_cast.cpp create mode 100644 clang/test/SemaCXX/no_dynamic_cast.cpp diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 6b4dcc850612ed..a9bd52b8afcdfd 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1235,3 +1235,5 @@ in addition with the pragmas or -fmax-tokens flag to get any warnings. } def WebAssemblyExceptionSpec : DiagGroup<"wasm-exception-spec">; + +def RTTI : DiagGroup<"rtti">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d856f784e0eeae..e1601da74b735a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7438,6 +7438,12 @@ def err_no_typeid_with_fno_rtti : Error< "use of typeid requires -frtti">; def err_no_dynamic_cast_with_fno_rtti : Error< "use of dynamic_cast requires -frtti">; +def warn_no_dynamic_cast_with_rtti_disabled: Warning< + "dynamic_cast will not work since RTTI data is disabled by " + "%select{-fno-rtti-data|/GR-}0">, InGroup; +def warn_no_typeid_with_rtti_disabled: Warning< + "typeid will not work since RTTI data is disabled by " + "%select{-fno-rtti-data|/GR-}0">, InGroup; def err_cannot_form_pointer_to_member_of_reference_type : Error< "cannot form a pointer-to-member to member %0 of reference type %1">; diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 726900c59f20e4..b213fb756a6503 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -890,6 +890,18 @@ void CastOperation::CheckDynamicCast() { return; } + // Warns when dynamic_cast is used with RTTI data disabled. + if (!Self.getLangOpts().RTTIData) { + bool MicrosoftABI = + Self.getASTContext().getTargetInfo().getCXXABI().isMicrosoft(); + bool isClangCL = Self.getDiagnostics().getDiagnosticOptions().getFormat() == + DiagnosticOptions::MSVC; + if (MicrosoftABI || !DestPointee->isVoidType()) + Self.Diag(OpRange.getBegin(), + diag::warn_no_dynamic_cast_with_rtti_disabled) + << isClangCL; + } + // Done. Everything else is run-time checks. Kind = CK_Dynamic; } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d1fcdf35452788..8f8847e638040a 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -646,6 +646,12 @@ Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc, return ExprError(Diag(OpLoc, diag::err_no_typeid_with_fno_rtti)); } + // Warns when typeid is used with RTTI data disabled. + if (!getLangOpts().RTTIData) + Diag(OpLoc, diag::warn_no_typeid_with_rtti_disabled) + << (getDiagnostics().getDiagnosticOptions().getFormat() == + DiagnosticOptions::MSVC); + QualType TypeInfoType = Context.getTypeDeclType(CXXTypeInfoDecl); if (isType) { diff --git a/clang/test/SemaCXX/ms_no_dynamic_cast.cpp b/clang/test/SemaCXX/ms_no_dynamic_cast.cpp new file mode 100644 index 00000000000000..d2c007fd8c297b --- /dev/null +++ b/clang/test/SemaCXX/ms_no_dynamic_cast.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 %s -triple x86_64-windows -fdiagnostics-format msvc -fno-rtti-data -fsyntax-only -verify + +namespace std { +struct type_info {}; +} // namespace std +class B { +public: + virtual ~B() = default; +}; + +class D1 : public B { +public: + ~D1() = default; +}; + +void f() { + B* b = new D1(); + auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} + void* v = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} + (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by /GR-}} +} diff --git a/clang/test/SemaCXX/no_dynamic_cast.cpp b/clang/test/SemaCXX/no_dynamic_cast.cpp new file mode 100644 index 00000000000000..4db21d36f4a998 --- /dev/null +++ b/clang/test/SemaCXX/no_dynamic_cast.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 %s -fno-rtti-data -fsyntax-only -verify + +namespace std { +struct type_info {}; +} // namespace std +class B { +public: + virtual ~B() = default; +}; + +class D1 : public B { +public: + ~D1() = default; +}; + +void f() { + B* b = new D1(); + auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by -fno-rtti-data}} + void* v = dynamic_cast(b); + (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by -fno-rtti-data}} +} From 0a63679267e4a2e81c6b193c25ed2579c65eb824 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 7 Sep 2020 23:58:54 +0000 Subject: [PATCH 026/161] Add documentation for getDependentDialects() in the PassManagement infra docs Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D87181 --- mlir/docs/PassManagement.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index 92ca92218219c0..6e577db4501c1d 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -104,6 +104,15 @@ struct MyOperationPass : public OperationPass { }; ``` +### Dependent Dialects + +Dialects must be loaded in the MLIRContext before entities from these dialects +(operations, types, attributes, ...) can be created. Dialects must be loaded +before starting the multi-threaded pass pipeline execution. To this end, a pass +that can create an entity from a dialect that isn't already loaded must express +this by overriding the `getDependentDialects()` method and declare this list of +Dialects explicitly. + ## Analysis Management An important concept, along with transformation passes, are analyses. These are @@ -684,6 +693,8 @@ It contains the following fields: * description - A longer, more detailed description of the pass. This is used when generating pass documentation. +* dependentDialects + - A list of strings that are the Dialect classes this pass can introduce. * constructor - A piece of C++ code used to create a default instance of the pass. * options From 63d1dc66658fa072c6e0caba6c97e00da37555ce Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Sep 2020 00:06:37 +0000 Subject: [PATCH 027/161] Add a doc/tutorial on traversing the IR Reviewed By: stephenneuendorffer Differential Revision: https://reviews.llvm.org/D87221 --- .../Tutorials/UnderstandingTheIRStructure.md | 287 ++++++++++++++++++ mlir/docs/includes/img/DefUseChains.svg | 1 + mlir/docs/includes/img/Use-list.svg | 1 + mlir/test/IR/print-ir-defuse.mlir | 31 ++ mlir/test/IR/print-ir-nesting.mlir | 57 ++++ mlir/test/lib/IR/CMakeLists.txt | 2 + mlir/test/lib/IR/TestPrintDefUse.cpp | 71 +++++ mlir/test/lib/IR/TestPrintNesting.cpp | 96 ++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 4 + 9 files changed, 550 insertions(+) create mode 100644 mlir/docs/Tutorials/UnderstandingTheIRStructure.md create mode 100644 mlir/docs/includes/img/DefUseChains.svg create mode 100644 mlir/docs/includes/img/Use-list.svg create mode 100644 mlir/test/IR/print-ir-defuse.mlir create mode 100644 mlir/test/IR/print-ir-nesting.mlir create mode 100644 mlir/test/lib/IR/TestPrintDefUse.cpp create mode 100644 mlir/test/lib/IR/TestPrintNesting.cpp diff --git a/mlir/docs/Tutorials/UnderstandingTheIRStructure.md b/mlir/docs/Tutorials/UnderstandingTheIRStructure.md new file mode 100644 index 00000000000000..8b4f7724741fa3 --- /dev/null +++ b/mlir/docs/Tutorials/UnderstandingTheIRStructure.md @@ -0,0 +1,287 @@ +# Understanding the IR Structure + +The MLIR Language Reference describes the +[High Level Structure](../LangRef/#high-level-structure), this document +illustrates this structure through examples, and introduces at the same time the +C++ APIs involved in manipulating it. + +We will implement a [pass](../PassManagement/#operation-pass) that traverses any +MLIR input and prints the entity inside the IR. A pass (or in general almost any +piece of IR) is always rooted with an operation. Most of the time the top-level +operation is a `ModuleOp`, the MLIR `PassManager` is actually limited to +operation on a top-level `ModuleOp`. As such a pass starts with an operation, +and so will our traversal: + +``` + void runOnOperation() override { + Operation *op = getOperation(); + resetIndent(); + printOperation(op); + } +``` + +## Traversing the IR Nesting + +The IR is recursively nested, an `Operation` can have one or multiple nested +`Region`s, each of which is actually a list of `Blocks`, each of which itself +wraps a list of `Operation`s. Our traversal will follow this structure with +three methods: `printOperation()`, `printRegion()`, and `printBlock()`. + +The first method inspects the properties of an operation, before iterating on +the nested regions and print them individually: + +```c++ + void printOperation(Operation *op) { + // Print the operation itself and some of its properties + printIndent() << "visiting op: '" << op->getName() << "' with " + << op->getNumOperands() << " operands and " + << op->getNumResults() << " results\n"; + // Print the operation attributes + if (!op->getAttrs().empty()) { + printIndent() << op->getAttrs().size() << " attributes:\n"; + for (NamedAttribute attr : op->getAttrs()) + printIndent() << " - '" << attr.first << "' : '" << attr.second + << "'\n"; + } + + // Recurse into each of the regions attached to the operation. + printIndent() << " " << op->getNumRegions() << " nested regions:\n"; + auto indent = pushIndent(); + for (Region ®ion : op->getRegions()) + printRegion(region); + } +``` + +A `Region` does not hold anything other than a list of `Block`s: + +```c++ + void printRegion(Region ®ion) { + // A region does not hold anything by itself other than a list of blocks. + printIndent() << "Region with " << region.getBlocks().size() + << " blocks:\n"; + auto indent = pushIndent(); + for (Block &block : region.getBlocks()) + printBlock(block); + } +``` + +Finally, a `Block` has a list of arguments, and holds a list of `Operation`s: + +```c++ + void printBlock(Block &block) { + // Print the block intrinsics properties (basically: argument list) + printIndent() + << "Block with " << block.getNumArguments() << " arguments, " + << block.getNumSuccessors() + << " successors, and " + // Note, this `.size()` is traversing a linked-list and is O(n). + << block.getOperations().size() << " operations\n"; + + // A block main role is to hold a list of Operations: let's recurse into + // printing each operation. + auto indent = pushIndent(); + for (Operation &op : block.getOperations()) + printOperation(&op); + } +``` + +The code for the pass is available +[here in the repo](https://github.com/llvm/llvm-project/blob/master/mlir/test/lib/IR/TestPrintNesting.cpp) +and can be exercised with `mlir-opt -test-print-nesting`. + +### Example + +The Pass introduced in the previous section can be applied on the following IR +with `mlir-opt -test-print-nesting -allow-unregistered-dialect +llvm-project/mlir/test/IR/print-ir-nesting.mlir`: + +```mlir +"module"() ( { + %0:4 = "dialect.op1"() {"attribute name" = 42 : i32} : () -> (i1, i16, i32, i64) + "dialect.op2"() ( { + "dialect.innerop1"(%0#0, %0#1) : (i1, i16) -> () + }, { + "dialect.innerop2"() : () -> () + "dialect.innerop3"(%0#0, %0#2, %0#3)[^bb1, ^bb2] : (i1, i32, i64) -> () + ^bb1(%1: i32): // pred: ^bb0 + "dialect.innerop4"() : () -> () + "dialect.innerop5"() : () -> () + ^bb2(%2: i64): // pred: ^bb0 + "dialect.innerop6"() : () -> () + "dialect.innerop7"() : () -> () + }) {"other attribute" = 42 : i64} : () -> () + "module_terminator"() : () -> () +}) : () -> () +``` + +And will yield the following output: + +``` +visiting op: 'module' with 0 operands and 0 results + 1 nested regions: + Region with 1 blocks: + Block with 0 arguments, 0 successors, and 3 operations + visiting op: 'dialect.op1' with 0 operands and 4 results + 1 attributes: + - 'attribute name' : '42 : i32' + 0 nested regions: + visiting op: 'dialect.op2' with 0 operands and 0 results + 2 nested regions: + Region with 1 blocks: + Block with 0 arguments, 0 successors, and 1 operations + visiting op: 'dialect.innerop1' with 2 operands and 0 results + 0 nested regions: + Region with 3 blocks: + Block with 0 arguments, 2 successors, and 2 operations + visiting op: 'dialect.innerop2' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop3' with 3 operands and 0 results + 0 nested regions: + Block with 1 arguments, 0 successors, and 2 operations + visiting op: 'dialect.innerop4' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop5' with 0 operands and 0 results + 0 nested regions: + Block with 1 arguments, 0 successors, and 2 operations + visiting op: 'dialect.innerop6' with 0 operands and 0 results + 0 nested regions: + visiting op: 'dialect.innerop7' with 0 operands and 0 results + 0 nested regions: + visiting op: 'module_terminator' with 0 operands and 0 results + 0 nested regions: +``` + +## Other IR Traversal Methods. + +In many cases, unwrapping the recursive structure of the IR is cumbersome and +you may be interested in using other helpers. + +### Filtered iterator: `getOps()` + +For example the `Block` class exposes a convenient templated method +`getOps()` that provided a filtered iterator. Here is an example: + +```c++ + auto varOps = entryBlock.getOps(); + for (spirv::GlobalVariableOp gvOp : varOps) { + // process each GlobalVariable Operation in the block. + ... + } +``` + +Similarly, the `Region` class exposes the same `getOps` method that will iterate +on all the blocks in the region. + +### Walkers + +The `getOps()` is useful to iterate on some Operations immediately listed +inside a single block (or a single region), however it is frequently interesting +to traverse the IR in a nested fashion. To this end MLIR exposes the `walk()` +helper on `Operation`, `Block`, and `Region`. This helper takes a single +argument: a callback method that will be invoked for every operation recursively +nested under the provided entity. + +```c++ + // Recursively traverse all the regions and blocks nested inside the function + // and apply the callback on every single operation in post-order. + getFunction().walk([&](mlir::Operation *op) { + // process Operation `op`. + }); +``` + +The provided callback can be specialized to filter on a particular type of +Operation, for example the following will apply the callback only on `LinalgOp` +operations nested inside the function: + +```c++ + getFunction.walk([](LinalgOp linalgOp) { + // process LinalgOp `linalgOp`. + }); +``` + +Finally, the callback can optionally stop the walk by returning a +`WalkResult::interrupt()` value. For example the following walk will find all +`AllocOp` nested inside the function and interrupt the traversal if one of them +does not satisfy a criteria: + +```c++ + WalkResult result = getFunction().walk([&](AllocOp allocOp) { + if (!isValid(allocOp)) + return WalkResult::interrupt(); + return WalkResult::advance(); + }); + if (result.wasInterrupted()) + // One alloc wasn't matching. + ... +``` + +## Traversing the def-use chains + +Another relationship in the IR is the one that links a `Value` with its users. +As defined in the +[language reference](https://mlir.llvm.org/docs/LangRef/#high-level-structure), +each Value is either a `BlockArgument` or the result of exactly one `Operation` +(an `Operation` can have multiple results, each of them is a separate `Value`). +The users of a `Value` are `Operation`s, through their arguments: each +`Operation` argument references a single `Value`. + +Here is a code sample that inspects the operands of an `Operation` and prints +some information about them: + +```c++ + // Print information about the producer of each of the operands. + for (Value operand : op->getOperands()) { + if (Operation *producer = operand.getDefiningOp()) { + llvm::outs() << " - Operand produced by operation '" + << producer->getName() << "'\n"; + } else { + // If there is no defining op, the Value is necessarily a Block + // argument. + auto blockArg = operand.cast(); + llvm::outs() << " - Operand produced by Block argument, number " + << blockArg.getArgNumber() << "\n"; + } + } +``` + +Similarly, the following code sample iterates through the result `Value`s +produced by an `Operation` and for each result will iterate the users of these +results and print informations about them: + +```c++ + // Print information about the user of each of the result. + llvm::outs() << "Has " << op->getNumResults() << " results:\n"; + for (auto indexedResult : llvm::enumerate(op->getResults())) { + Value result = indexedResult.value(); + llvm::outs() << " - Result " << indexedResult.index(); + if (result.use_empty()) { + llvm::outs() << " has no uses\n"; + continue; + } + if (result.hasOneUse()) { + llvm::outs() << " has a single use: "; + } else { + llvm::outs() << " has " + << std::distance(result.getUses().begin(), + result.getUses().end()) + << " uses:\n"; + } + for (Operation *userOp : result.getUsers()) { + llvm::outs() << " - " << userOp->getName() << "\n"; + } + } +``` + +The illustrating code for this pass is available +[here in the repo](https://github.com/llvm/llvm-project/blob/master/mlir/test/lib/IR/TestPrintDefUse.cpp) +and can be exercised with `mlir-opt -test-print-defuse`. + +The chaining of `Value`s and their uses can be viewed as following: + +![Index Map Example](/includes/img/DefUseChains.svg) + +The uses of a `Value` (`OpOperand` or `BlockOperand`) are also chained in a +doubly linked-list, which is particularly useful when replacing all uses of a +`Value` with a new one ("RAUW"): + +![Index Map Example](/includes/img/Use-list.svg) diff --git a/mlir/docs/includes/img/DefUseChains.svg b/mlir/docs/includes/img/DefUseChains.svg new file mode 100644 index 00000000000000..de74a4e6e82eed --- /dev/null +++ b/mlir/docs/includes/img/DefUseChains.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlir/docs/includes/img/Use-list.svg b/mlir/docs/includes/img/Use-list.svg new file mode 100644 index 00000000000000..941ac052fd2e44 --- /dev/null +++ b/mlir/docs/includes/img/Use-list.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mlir/test/IR/print-ir-defuse.mlir b/mlir/test/IR/print-ir-defuse.mlir new file mode 100644 index 00000000000000..78c5804119250a --- /dev/null +++ b/mlir/test/IR/print-ir-defuse.mlir @@ -0,0 +1,31 @@ +// RUN: mlir-opt -test-print-defuse -allow-unregistered-dialect %s | FileCheck %s + +// CHECK: Visiting op 'dialect.op1' with 0 operands: +// CHECK: Has 4 results: +// CHECK: - Result 0 has a single use: - dialect.op2 +// CHECK: - Result 1 has no uses +// CHECK: - Result 2 has 2 uses: +// CHECK: - dialect.innerop1 +// CHECK: - dialect.op2 +// CHECK: - Result 3 has no uses +// CHECK: Visiting op 'dialect.op2' with 2 operands: +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: Has 0 results: +// CHECK: Visiting op 'dialect.innerop1' with 2 operands: +// CHECK: - Operand produced by Block argument, number 0 +// CHECK: - Operand produced by operation 'dialect.op1' +// CHECK: Has 0 results: +// CHECK: Visiting op 'dialect.op3' with 0 operands: +// CHECK: Has 0 results: +// CHECK: Visiting op 'module_terminator' with 0 operands: +// CHECK: Has 0 results: +// CHECK: Visiting op 'module' with 0 operands: +// CHECK: Has 0 results: + +%results:4 = "dialect.op1"() : () -> (i1, i16, i32, i64) +"dialect.op2"(%results#0, %results#2) : (i1, i32) -> () +"dialect.op3"() ({ + ^bb0(%arg0 : i1): + "dialect.innerop1"(%arg0, %results#2) : (i1, i32) -> () +}) : () -> () diff --git a/mlir/test/IR/print-ir-nesting.mlir b/mlir/test/IR/print-ir-nesting.mlir new file mode 100644 index 00000000000000..4682753947550c --- /dev/null +++ b/mlir/test/IR/print-ir-nesting.mlir @@ -0,0 +1,57 @@ +// RUN: mlir-opt -test-print-nesting -allow-unregistered-dialect %s | FileCheck %s + +// CHECK: visiting op: 'module' with 0 operands and 0 results +// CHECK: 1 nested regions: +// CHECK: Region with 1 blocks: +// CHECK: Block with 0 arguments, 0 successors, and 3 operations +module { + + +// CHECK: visiting op: 'dialect.op1' with 0 operands and 4 results +// CHECK: 1 attributes: +// CHECK: - 'attribute name' : '42 : i32' +// CHECK: 0 nested regions: + %results:4 = "dialect.op1"() { "attribute name" = 42 : i32 } : () -> (i1, i16, i32, i64) + + +// CHECK: visiting op: 'dialect.op2' with 0 operands and 0 results +// CHECK: 2 nested regions: + "dialect.op2"() ({ + +// CHECK: Region with 1 blocks: +// CHECK: Block with 0 arguments, 0 successors, and 1 operations +// CHECK: visiting op: 'dialect.innerop1' with 2 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop1"(%results#0, %results#1) : (i1, i16) -> () + +// CHECK: Region with 3 blocks: + },{ + +// CHECK: Block with 0 arguments, 2 successors, and 2 operations +// CHECK: visiting op: 'dialect.innerop2' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop2"() : () -> () +// CHECK: visiting op: 'dialect.innerop3' with 3 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop3"(%results#0, %results#2, %results#3)[^bb1, ^bb2] : (i1, i32, i64) -> () +// CHECK: Block with 1 arguments, 0 successors, and 2 operations + ^bb1(%arg1 : i32): +// CHECK: visiting op: 'dialect.innerop4' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop4"() : () -> () +// CHECK: visiting op: 'dialect.innerop5' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop5"() : () -> () +// CHECK: Block with 1 arguments, 0 successors, and 2 operations + ^bb2(%arg2 : i64): +// CHECK: visiting op: 'dialect.innerop6' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop6"() : () -> () +// CHECK: visiting op: 'dialect.innerop7' with 0 operands and 0 results +// CHECK: 0 nested regions: + "dialect.innerop7"() : () -> () + }) : () -> () + +// CHECK: visiting op: 'module_terminator' with 0 operands and 0 results + +} // module diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt index f77b26e5ca1848..cf4ecada0f3cb5 100644 --- a/mlir/test/lib/IR/CMakeLists.txt +++ b/mlir/test/lib/IR/CMakeLists.txt @@ -3,6 +3,8 @@ add_mlir_library(MLIRTestIR TestFunc.cpp TestInterfaces.cpp TestMatchers.cpp + TestPrintDefUse.cpp + TestPrintNesting.cpp TestSideEffects.cpp TestSymbolUses.cpp TestTypes.cpp diff --git a/mlir/test/lib/IR/TestPrintDefUse.cpp b/mlir/test/lib/IR/TestPrintDefUse.cpp new file mode 100644 index 00000000000000..3153a148477a9c --- /dev/null +++ b/mlir/test/lib/IR/TestPrintDefUse.cpp @@ -0,0 +1,71 @@ +//===- TestPrintDefUse.cpp - Passes to illustrate the IR def-use chains ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// This pass illustrates the IR def-use chains through printing. +struct TestPrintDefUsePass + : public PassWrapper> { + void runOnOperation() override { + // Recursively traverse the IR nested under the current operation and print + // every single operation and their operands and users. + getOperation()->walk([](Operation *op) { + llvm::outs() << "Visiting op '" << op->getName() << "' with " + << op->getNumOperands() << " operands:\n"; + + // Print information about the producer of each of the operands. + for (Value operand : op->getOperands()) { + if (Operation *producer = operand.getDefiningOp()) { + llvm::outs() << " - Operand produced by operation '" + << producer->getName() << "'\n"; + } else { + // If there is no defining op, the Value is necessarily a Block + // argument. + auto blockArg = operand.cast(); + llvm::outs() << " - Operand produced by Block argument, number " + << blockArg.getArgNumber() << "\n"; + } + } + + // Print information about the user of each of the result. + llvm::outs() << "Has " << op->getNumResults() << " results:\n"; + for (auto indexedResult : llvm::enumerate(op->getResults())) { + Value result = indexedResult.value(); + llvm::outs() << " - Result " << indexedResult.index(); + if (result.use_empty()) { + llvm::outs() << " has no uses\n"; + continue; + } + if (result.hasOneUse()) { + llvm::outs() << " has a single use: "; + } else { + llvm::outs() << " has " + << std::distance(result.getUses().begin(), + result.getUses().end()) + << " uses:\n"; + } + for (Operation *userOp : result.getUsers()) { + llvm::outs() << " - " << userOp->getName() << "\n"; + } + } + }); + } +}; +} // end anonymous namespace + +namespace mlir { +void registerTestPrintDefUsePass() { + PassRegistration("test-print-defuse", + "Test various printing."); +} +} // namespace mlir diff --git a/mlir/test/lib/IR/TestPrintNesting.cpp b/mlir/test/lib/IR/TestPrintNesting.cpp new file mode 100644 index 00000000000000..825d241740fda0 --- /dev/null +++ b/mlir/test/lib/IR/TestPrintNesting.cpp @@ -0,0 +1,96 @@ +//===- TestPrintNesting.cpp - Passes to illustrate the IR nesting ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/Function.h" +#include "mlir/Pass/Pass.h" + +using namespace mlir; + +namespace { +/// This pass illustrates the IR nesting through printing. +struct TestPrintNestingPass + : public PassWrapper> { + // Entry point for the pass. + void runOnOperation() override { + Operation *op = getOperation(); + resetIndent(); + printOperation(op); + } + + /// The three methods below are mutually recursive and follow the nesting of + /// the IR: operation->region->block->operation->... + + void printOperation(Operation *op) { + // Print the operation itself and some of its properties + printIndent() << "visiting op: '" << op->getName() << "' with " + << op->getNumOperands() << " operands and " + << op->getNumResults() << " results\n"; + // Print the operation attributes + if (!op->getAttrs().empty()) { + printIndent() << op->getAttrs().size() << " attributes:\n"; + for (NamedAttribute attr : op->getAttrs()) + printIndent() << " - '" << attr.first << "' : '" << attr.second + << "'\n"; + } + + // Recurse into each of the regions attached to the operation. + printIndent() << " " << op->getNumRegions() << " nested regions:\n"; + auto indent = pushIndent(); + for (Region ®ion : op->getRegions()) + printRegion(region); + } + + void printRegion(Region ®ion) { + // A region does not hold anything by itself other than a list of blocks. + printIndent() << "Region with " << region.getBlocks().size() + << " blocks:\n"; + auto indent = pushIndent(); + for (Block &block : region.getBlocks()) + printBlock(block); + } + + void printBlock(Block &block) { + // Print the block intrinsics properties (basically: argument list) + printIndent() + << "Block with " << block.getNumArguments() << " arguments, " + << block.getNumSuccessors() + << " successors, and " + // Note, this `.size()` is traversing a linked-list and is O(n). + << block.getOperations().size() << " operations\n"; + + // Block main role is to hold a list of Operations: let's recurse. + auto indent = pushIndent(); + for (Operation &op : block.getOperations()) + printOperation(&op); + } + + /// Manages the indentation as we traverse the IR nesting. + int indent; + struct IdentRAII { + int &indent; + IdentRAII(int &indent) : indent(indent) {} + ~IdentRAII() { --indent; } + }; + void resetIndent() { indent = 0; } + IdentRAII pushIndent() { return IdentRAII(++indent); } + + llvm::raw_ostream &printIndent() { + for (int i = 0; i < indent; ++i) + llvm::outs() << " "; + return llvm::outs(); + } +}; +} // end anonymous namespace + +namespace mlir { +void registerTestPrintNestingPass() { + PassRegistration("test-print-nesting", + "Test various printing."); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index ad76abed647e7b..34e03a5f992017 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -66,6 +66,8 @@ void registerTestMemRefDependenceCheck(); void registerTestMemRefStrideCalculation(); void registerTestOpaqueLoc(); void registerTestPreparationPassWithAllowedMemrefResults(); +void registerTestPrintDefUsePass(); +void registerTestPrintNestingPass(); void registerTestRecursiveTypesPass(); void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); @@ -115,6 +117,8 @@ void registerTestPasses() { registerTestMemRefStrideCalculation(); registerTestOpaqueLoc(); registerTestPreparationPassWithAllowedMemrefResults(); + registerTestPrintDefUsePass(); + registerTestPrintNestingPass(); registerTestRecursiveTypesPass(); registerTestReducer(); registerTestGpuParallelLoopMappingPass(); From 8dcd6ea644cf86aba3dea5b1d3c1af4f350d22ab Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Sep 2020 00:56:10 +0000 Subject: [PATCH 028/161] Update SVG images to be properly cropped (NFC) --- mlir/docs/includes/img/DefUseChains.svg | 2 +- mlir/docs/includes/img/Use-list.svg | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/docs/includes/img/DefUseChains.svg b/mlir/docs/includes/img/DefUseChains.svg index de74a4e6e82eed..2d5b75246772ab 100644 --- a/mlir/docs/includes/img/DefUseChains.svg +++ b/mlir/docs/includes/img/DefUseChains.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/mlir/docs/includes/img/Use-list.svg b/mlir/docs/includes/img/Use-list.svg index 941ac052fd2e44..4840619f067412 100644 --- a/mlir/docs/includes/img/Use-list.svg +++ b/mlir/docs/includes/img/Use-list.svg @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file From 35f708a3c9ffceacbeaf8abfb0ba5123e346b30e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Sep 2020 17:57:39 -0700 Subject: [PATCH 029/161] [builtins] Inline __paritysi2 into __paritydi2 and inline __paritydi2 into __parityti2. No point in making __parityti2 go through 2 calls to get to __paritysi2. Reviewed By: MaskRay, efriedma Differential Revision: https://reviews.llvm.org/D87218 --- compiler-rt/lib/builtins/paritydi2.c | 6 +++++- compiler-rt/lib/builtins/parityti2.c | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/builtins/paritydi2.c b/compiler-rt/lib/builtins/paritydi2.c index 58e85f89e04371..350dceb8cef592 100644 --- a/compiler-rt/lib/builtins/paritydi2.c +++ b/compiler-rt/lib/builtins/paritydi2.c @@ -17,5 +17,9 @@ COMPILER_RT_ABI int __paritydi2(di_int a) { dwords x; x.all = a; - return __paritysi2(x.s.high ^ x.s.low); + su_int x2 = x.s.high ^ x.s.low; + x2 ^= x2 >> 16; + x2 ^= x2 >> 8; + x2 ^= x2 >> 4; + return (0x6996 >> (x2 & 0xF)) & 1; } diff --git a/compiler-rt/lib/builtins/parityti2.c b/compiler-rt/lib/builtins/parityti2.c index 79e920d8a02df5..011c8dd455620c 100644 --- a/compiler-rt/lib/builtins/parityti2.c +++ b/compiler-rt/lib/builtins/parityti2.c @@ -18,8 +18,14 @@ COMPILER_RT_ABI int __parityti2(ti_int a) { twords x; + dwords x2; x.all = a; - return __paritydi2(x.s.high ^ x.s.low); + x2.all = x.s.high ^ x.s.low; + su_int x3 = x2.s.high ^ x2.s.low; + x3 ^= x3 >> 16; + x3 ^= x3 >> 8; + x3 ^= x3 >> 4; + return (0x6996 >> (x3 & 0xF)) & 1; } #endif // CRT_HAS_128BIT From 4536c6acb3809eaadc836f24f091db1b50b82af9 Mon Sep 17 00:00:00 2001 From: Kiran Kumar T P Date: Tue, 8 Sep 2020 06:52:07 +0530 Subject: [PATCH 030/161] [flang][OpenMP] Enhance parser support for atomic construct to OpenMP 5.0 Summary: This patch enhances parser support for atomic construct to OpenMP 5.0. 2.17.7 atomic -> ATOMIC [clause [,]] atomic-clause [[,] clause] | ATOMIC [clause] clause -> memory-order-clause | HINT(hint-expression) memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED atomic-clause -> READ | WRITE | UPDATE | CAPTURE The patch includes code changes and testcase modifications. Reviewed By: DavidTruby, kiranchandramohan, sameeranjoshi Differential Revision: https://reviews.llvm.org/D82931 --- flang/docs/OpenMP-4.5-grammar.txt | 2 + flang/include/flang/Parser/dump-parse-tree.h | 16 ++-- flang/include/flang/Parser/parse-tree.h | 57 +++++++----- flang/lib/Parser/openmp-parsers.cpp | 91 ++++++++++++-------- flang/lib/Parser/unparse.cpp | 54 ++++++++---- flang/test/Semantics/omp-atomic.f90 | 22 ++++- 6 files changed, 160 insertions(+), 82 deletions(-) diff --git a/flang/docs/OpenMP-4.5-grammar.txt b/flang/docs/OpenMP-4.5-grammar.txt index c74072ba1ef27c..180494bbf509ea 100644 --- a/flang/docs/OpenMP-4.5-grammar.txt +++ b/flang/docs/OpenMP-4.5-grammar.txt @@ -344,6 +344,8 @@ ATOMIC [seq_cst] atomic-clause -> READ | WRITE | UPDATE | CAPTURE +2.13.6 end-atomic -> END ATOMIC + 2.13.7 flush -> FLUSH [(variable-name-list)] 2.13.8 ordered -> ORDERED ordered-construct-clause [[[,] ordered-construct-clause]...] diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 41ff9631d1011c..921e6172bf89ba 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -445,6 +445,9 @@ class ParseTreeDumper { NODE(parser, OmpAtomicCapture) NODE(OmpAtomicCapture, Stmt1) NODE(OmpAtomicCapture, Stmt2) + NODE(parser, OmpAtomicMemoryOrderClause) + NODE(parser, OmpAtomicMemoryOrderClauseList) + NODE(parser, OmpAtomicMemoryOrderClausePostList) NODE(parser, OmpAtomicRead) NODE(parser, OmpAtomicUpdate) NODE(parser, OmpAtomicWrite) @@ -464,7 +467,6 @@ class ParseTreeDumper { #include "llvm/Frontend/OpenMP/OMP.cpp.inc" NODE(parser, OmpClauseList) NODE(parser, OmpCriticalDirective) - NODE(OmpCriticalDirective, Hint) NODE(parser, OmpDeclareTargetSpecifier) NODE(parser, OmpDeclareTargetWithClause) NODE(parser, OmpDeclareTargetWithList) @@ -487,6 +489,7 @@ class ParseTreeDumper { NODE(parser, OmpEndCriticalDirective) NODE(parser, OmpEndLoopDirective) NODE(parser, OmpEndSectionsDirective) + NODE(parser, OmpHintExpr) NODE(parser, OmpIfClause) NODE_ENUM(OmpIfClause, DirectiveNameModifier) NODE(parser, OmpLinearClause) @@ -499,10 +502,12 @@ class ParseTreeDumper { NODE(parser, OmpMapType) NODE(OmpMapType, Always) NODE_ENUM(OmpMapType, Type) - NODE(parser, OmpMemoryClause) - NODE_ENUM(OmpMemoryClause, MemoryOrder) - NODE(parser, OmpMemoryClauseList) - NODE(parser, OmpMemoryClausePostList) + NODE(parser, OmpMemoryOrderClause) + static std::string GetNodeName(const llvm::omp::Clause &x) { + return llvm::Twine( + "llvm::omp::Clause = ", llvm::omp::getOpenMPClauseName(x)) + .str(); + } NODE(parser, OmpNowait) NODE(parser, OmpObject) NODE(parser, OmpObjectList) @@ -549,7 +554,6 @@ class ParseTreeDumper { NODE(parser, OpenMPDeclareSimdConstruct) NODE(parser, OpenMPDeclareTargetConstruct) NODE(parser, OmpFlushMemoryClause) - NODE_ENUM(OmpFlushMemoryClause, FlushMemoryOrder) NODE(parser, OpenMPFlushConstruct) NODE(parser, OpenMPLoopConstruct) NODE(parser, OpenMPSimpleStandaloneConstruct) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 166e573b5cec34..a9fb92cf2584b7 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3591,12 +3591,14 @@ struct OpenMPDeclarativeConstruct { u; }; +// HINT(hint-expression) +WRAPPER_CLASS(OmpHintExpr, ConstantExpr); + // 2.13.2 CRITICAL [Name] END CRITICAL [Name] struct OmpCriticalDirective { TUPLE_CLASS_BOILERPLATE(OmpCriticalDirective); - WRAPPER_CLASS(Hint, ConstantExpr); CharBlock source; - std::tuple, std::optional> t; + std::tuple, std::optional> t; }; struct OmpEndCriticalDirective { TUPLE_CLASS_BOILERPLATE(OmpEndCriticalDirective); @@ -3608,44 +3610,56 @@ struct OpenMPCriticalConstruct { std::tuple t; }; -// 2.13.6 atomic -> ATOMIC [seq_cst[,]] atomic-clause [[,]seq_cst] | -// ATOMIC [seq_cst] +// 2.17.7 atomic -> ATOMIC [clause[,]] atomic-clause [[,]clause] | +// ATOMIC [clause] +// clause -> memory-order-clause | HINT(hint-expression) +// memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED // atomic-clause -> READ | WRITE | UPDATE | CAPTURE // END ATOMIC EMPTY_CLASS(OmpEndAtomic); -// ATOMIC Memory related clause -struct OmpMemoryClause { - ENUM_CLASS(MemoryOrder, SeqCst) - WRAPPER_CLASS_BOILERPLATE(OmpMemoryClause, MemoryOrder); +// Memory order clause +struct OmpMemoryOrderClause { + WRAPPER_CLASS_BOILERPLATE(OmpMemoryOrderClause, llvm::omp::Clause); CharBlock source; }; -WRAPPER_CLASS(OmpMemoryClauseList, std::list); -WRAPPER_CLASS(OmpMemoryClausePostList, std::list); +// ATOMIC Memory order clause or hint expression +struct OmpAtomicMemoryOrderClause { + UNION_CLASS_BOILERPLATE(OmpAtomicMemoryOrderClause); + std::variant u; +}; + +WRAPPER_CLASS( + OmpAtomicMemoryOrderClauseList, std::list); +WRAPPER_CLASS( + OmpAtomicMemoryOrderClausePostList, std::list); // ATOMIC READ struct OmpAtomicRead { TUPLE_CLASS_BOILERPLATE(OmpAtomicRead); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; // ATOMIC WRITE struct OmpAtomicWrite { TUPLE_CLASS_BOILERPLATE(OmpAtomicWrite); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; // ATOMIC UPDATE struct OmpAtomicUpdate { TUPLE_CLASS_BOILERPLATE(OmpAtomicUpdate); - std::tuple, std::optional> + std::tuple, + std::optional> t; }; @@ -3654,16 +3668,16 @@ struct OmpAtomicCapture { TUPLE_CLASS_BOILERPLATE(OmpAtomicCapture); WRAPPER_CLASS(Stmt1, Statement); WRAPPER_CLASS(Stmt2, Statement); - std::tuple + std::tuple t; }; // ATOMIC struct OmpAtomic { TUPLE_CLASS_BOILERPLATE(OmpAtomic); - std::tuple, - std::optional> + std::tuple, std::optional> t; }; @@ -3707,8 +3721,7 @@ struct OpenMPCancelConstruct { // release // acquire struct OmpFlushMemoryClause { - ENUM_CLASS(FlushMemoryOrder, AcqRel, Release, Acquire) - WRAPPER_CLASS_BOILERPLATE(OmpFlushMemoryClause, FlushMemoryOrder); + WRAPPER_CLASS_BOILERPLATE(OmpFlushMemoryClause, llvm::omp::Clause); CharBlock source; }; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index cd5ee0de556dc0..a7f4a1ae492c78 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -300,9 +300,9 @@ TYPE_PARSER(sourced(construct(verbatim("CANCEL"_tok), // release // acquire TYPE_PARSER(sourced(construct( - "ACQ_REL" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::AcqRel) || - "RELEASE" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::Release) || - "ACQUIRE" >> pure(OmpFlushMemoryClause::FlushMemoryOrder::Acquire)))) + "ACQ_REL" >> pure(llvm::omp::Clause::OMPC_acq_rel) || + "RELEASE" >> pure(llvm::omp::Clause::OMPC_release) || + "ACQUIRE" >> pure(llvm::omp::Clause::OMPC_acquire)))) TYPE_PARSER(sourced(construct(verbatim("FLUSH"_tok), maybe(Parser{}), @@ -384,51 +384,74 @@ TYPE_PARSER(construct(Parser{}) || construct(Parser{}, parenthesized(optionalList(actualArgSpec)))))) -// 2.13.6 ATOMIC [seq_cst[,]] atomic-clause [[,]seq_cst] | ATOMIC [seq_cst] -// atomic-clause -> READ | WRITE | UPDATE | CAPTURE +// Hint Expression => HINT(hint-expression) +TYPE_PARSER("HINT" >> construct(parenthesized(constantExpr))) + +// 2.17.7 atomic -> ATOMIC [clause [,]] atomic-clause [[,] clause] | +// ATOMIC [clause] +// clause -> memory-order-clause | HINT(hint-expression) +// memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | RELAXED +// atomic-clause -> READ | WRITE | UPDATE | CAPTURE // OMP END ATOMIC TYPE_PARSER(construct(startOmpLine >> "END ATOMIC"_tok)) -// ATOMIC Memory related clause -TYPE_PARSER(sourced(construct( - "SEQ_CST" >> pure(OmpMemoryClause::MemoryOrder::SeqCst)))) +// Memory order clause +TYPE_PARSER(sourced(construct( + "SEQ_CST" >> pure(llvm::omp::Clause::OMPC_seq_cst) || + "ACQ_REL" >> pure(llvm::omp::Clause::OMPC_acq_rel) || + "RELEASE" >> pure(llvm::omp::Clause::OMPC_release) || + "ACQUIRE" >> pure(llvm::omp::Clause::OMPC_acquire) || + "RELAXED" >> pure(llvm::omp::Clause::OMPC_relaxed)))) -// ATOMIC Memory Clause List -TYPE_PARSER(construct( - many(maybe(","_tok) >> Parser{}))) +// ATOMIC Memory order clause or Hint expression +TYPE_PARSER( + construct(Parser{}) || + construct(Parser{})) -TYPE_PARSER(construct( - many(maybe(","_tok) >> Parser{}))) +// ATOMIC Memory order Clause List +TYPE_PARSER(construct( + many(maybe(","_tok) >> Parser{}))) -// OMP [SEQ_CST] ATOMIC READ [SEQ_CST] -TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("READ"_tok), Parser{} / endOmpLine, - statement(assignmentStmt), maybe(Parser{} / endOmpLine))) +TYPE_PARSER(construct( + many(maybe(","_tok) >> Parser{}))) -// OMP ATOMIC [SEQ_CST] CAPTURE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] READ [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("CAPTURE"_tok), Parser{} / endOmpLine, - statement(assignmentStmt), statement(assignmentStmt), - Parser{} / endOmpLine)) + construct( + Parser{} / maybe(","_tok), + verbatim("READ"_tok), + Parser{} / endOmpLine, + statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// OMP ATOMIC [SEQ_CST] UPDATE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] CAPTURE [MEMORY-ORDER-CLAUSE-LIST] +TYPE_PARSER( + "ATOMIC" >> construct( + Parser{} / maybe(","_tok), + verbatim("CAPTURE"_tok), + Parser{} / endOmpLine, + statement(assignmentStmt), statement(assignmentStmt), + Parser{} / endOmpLine)) + +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] UPDATE [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("UPDATE"_tok), Parser{} / endOmpLine, + construct( + Parser{} / maybe(","_tok), + verbatim("UPDATE"_tok), + Parser{} / endOmpLine, statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// OMP ATOMIC [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER(construct(verbatim("ATOMIC"_tok), - Parser{} / endOmpLine, statement(assignmentStmt), - maybe(Parser{} / endOmpLine))) + Parser{} / endOmpLine, + statement(assignmentStmt), maybe(Parser{} / endOmpLine))) -// ATOMIC [SEQ_CST] WRITE [SEQ_CST] +// OMP ATOMIC [MEMORY-ORDER-CLAUSE-LIST] WRITE [MEMORY-ORDER-CLAUSE-LIST] TYPE_PARSER("ATOMIC" >> - construct(Parser{} / maybe(","_tok), - verbatim("WRITE"_tok), Parser{} / endOmpLine, + construct( + Parser{} / maybe(","_tok), + verbatim("WRITE"_tok), + Parser{} / endOmpLine, statement(assignmentStmt), maybe(Parser{} / endOmpLine))) // Atomic Construct @@ -444,9 +467,7 @@ TYPE_PARSER(startOmpLine >> verbatim("END CRITICAL"_tok), maybe(parenthesized(name)))) / endOmpLine) TYPE_PARSER(sourced(construct(verbatim("CRITICAL"_tok), - maybe(parenthesized(name)), - maybe("HINT" >> construct( - parenthesized(constantExpr))))) / + maybe(parenthesized(name)), maybe(Parser{}))) / endOmpLine) TYPE_PARSER(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index e26795d0825bb6..ab94aa2e00c262 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2222,19 +2222,36 @@ class UnparseVisitor { break; } } - void Unparse(const OmpMemoryClause &x) { + void Unparse(const OmpHintExpr &x) { Word("HINT("), Walk(x.v), Put(')'); } + void Unparse(const OmpMemoryOrderClause &x) { switch (x.v) { - case OmpMemoryClause::MemoryOrder::SeqCst: + case llvm::omp::Clause::OMPC_seq_cst: Word("SEQ_CST"); break; + case llvm::omp::Clause::OMPC_acq_rel: + Word("ACQ_REL"); + break; + case llvm::omp::Clause::OMPC_release: + Word("RELEASE"); + break; + case llvm::omp::Clause::OMPC_acquire: + Word("ACQUIRE"); + break; + case llvm::omp::Clause::OMPC_relaxed: + Word("RELAXED"); + break; + default: + break; } } - void Unparse(const OmpMemoryClauseList &x) { Walk(" ", x.v, " "); } - void Unparse(const OmpMemoryClausePostList &x) { Walk(" ", x.v, " "); } + void Unparse(const OmpAtomicMemoryOrderClauseList &x) { Walk(" ", x.v, " "); } + void Unparse(const OmpAtomicMemoryOrderClausePostList &x) { + Walk(" ", x.v, " "); + } void Unparse(const OmpAtomic &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2245,9 +2262,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicCapture &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" CAPTURE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get(x.t)); @@ -2260,9 +2277,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicRead &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" READ"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2273,9 +2290,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicUpdate &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" UPDATE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2286,9 +2303,9 @@ class UnparseVisitor { void Unparse(const OmpAtomicWrite &x) { BeginOpenMP(); Word("!$OMP ATOMIC"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Word(" WRITE"); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put("\n"); EndOpenMP(); Walk(std::get>(x.t)); @@ -2300,8 +2317,7 @@ class UnparseVisitor { BeginOpenMP(); Word("!$OMP CRITICAL"); Walk(" (", std::get>(x.t), ")"); - Walk(" HINT(", std::get>(x.t), - ")"); + Walk(std::get>(x.t)); Put("\n"); EndOpenMP(); } @@ -2431,15 +2447,17 @@ class UnparseVisitor { } void Unparse(const OmpFlushMemoryClause &x) { switch (x.v) { - case OmpFlushMemoryClause::FlushMemoryOrder::AcqRel: + case llvm::omp::Clause::OMPC_acq_rel: Word("ACQ_REL "); break; - case OmpFlushMemoryClause::FlushMemoryOrder::Release: + case llvm::omp::Clause::OMPC_release: Word("RELEASE "); break; - case OmpFlushMemoryClause::FlushMemoryOrder::Acquire: + case llvm::omp::Clause::OMPC_acquire: Word("ACQUIRE "); break; + default: + break; } } void Unparse(const OpenMPFlushConstruct &x) { diff --git a/flang/test/Semantics/omp-atomic.f90 b/flang/test/Semantics/omp-atomic.f90 index d5cb87aaba32da..8d3f95a770454f 100644 --- a/flang/test/Semantics/omp-atomic.f90 +++ b/flang/test/Semantics/omp-atomic.f90 @@ -1,5 +1,5 @@ ! RUN: %S/test_errors.sh %s %t %f18 -fopenmp - +use omp_lib ! Check OpenMP 2.13.6 atomic Construct a = 1.0 @@ -11,12 +11,32 @@ a = b !$omp end atomic + !$omp atomic read acquire hint(OMP_LOCK_HINT_CONTENDED) + a = b + + !$omp atomic release hint(OMP_LOCK_HINT_UNCONTENDED) write + a = b + !$omp atomic capture seq_cst b = a a = a + 1 !$omp end atomic + !$omp atomic hint(1) acq_rel capture + b = a + a = a + 1 + !$omp end atomic + + !ERROR: expected end of line + !ERROR: expected end of line + !$omp atomic read write + a = a + 1 + !$omp atomic a = a + 1 + + !$omp atomic relaxed + a = a + 1 + !$omp end parallel end From 10af5bad443dd15b79876fbad66d836ab9e9a4ed Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Mon, 7 Sep 2020 18:29:48 -0700 Subject: [PATCH 031/161] [llvm-objcopy] Consolidate and unify version tests In this diff the tests which verify version printing functionality are refactored. Since they are not specific to a particular format we move them into tool-version.test and slightly unify (similarly to tool-name.test and tool-help-message.test). Test plan: make check-all Differential revision: https://reviews.llvm.org/D87211 --- .../tools/llvm-objcopy/ELF/objcopy-version.test | 4 ---- .../tools/llvm-objcopy/ELF/strip-version.test | 5 ----- .../MachO/install-name-tool-version.test | 2 -- llvm/test/tools/llvm-objcopy/tool-version.test | 15 +++++++++++++++ 4 files changed, 15 insertions(+), 11 deletions(-) delete mode 100644 llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test delete mode 100644 llvm/test/tools/llvm-objcopy/ELF/strip-version.test delete mode 100644 llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test create mode 100644 llvm/test/tools/llvm-objcopy/tool-version.test diff --git a/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test b/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test deleted file mode 100644 index 7494ccd2866d34..00000000000000 --- a/llvm/test/tools/llvm-objcopy/ELF/objcopy-version.test +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: llvm-objcopy --version | FileCheck %s -# RUN: llvm-objcopy -V | FileCheck %s - -# CHECK: {{ version }} diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-version.test b/llvm/test/tools/llvm-objcopy/ELF/strip-version.test deleted file mode 100644 index 4b2f137ce2aad2..00000000000000 --- a/llvm/test/tools/llvm-objcopy/ELF/strip-version.test +++ /dev/null @@ -1,5 +0,0 @@ -# RUN: llvm-strip --version | FileCheck %s -# RUN: llvm-strip -V | FileCheck %s - -# CHECK-DAG: {{ version }} -# CHECK-DAG: GNU strip diff --git a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test b/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test deleted file mode 100644 index 295e5735610122..00000000000000 --- a/llvm/test/tools/llvm-objcopy/MachO/install-name-tool-version.test +++ /dev/null @@ -1,2 +0,0 @@ -# RUN: llvm-install-name-tool --version | FileCheck %s -# CHECK: {{ version }} diff --git a/llvm/test/tools/llvm-objcopy/tool-version.test b/llvm/test/tools/llvm-objcopy/tool-version.test new file mode 100644 index 00000000000000..5fe33eb8e7173b --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/tool-version.test @@ -0,0 +1,15 @@ +# RUN: llvm-objcopy --version | FileCheck --check-prefix=OBJCOPY %s +# RUN: llvm-objcopy -V | FileCheck --check-prefix=OBJCOPY %s + +# RUN: llvm-strip --version | FileCheck --check-prefix=STRIP %s +# RUN: llvm-strip -V | FileCheck --check-prefix=STRIP %s + +# RUN: llvm-install-name-tool --version | FileCheck %s + +# OBJCOPY-DAG: {{ version }} +# OBJCOPY-DAG: GNU objcopy + +# STRIP-DAG: {{ version }} +# STRIP-DAG: GNU strip + +# CHECK: {{ version }} From 3c0b3250230b3847a2a47dfeacfdb794c2285f02 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 8 Sep 2020 11:03:09 +0800 Subject: [PATCH 032/161] [PowerPC] Implement instruction clustering for stores On Power10, it's profitable to schedule some stores with adjacent target address together. This patch implements this feature. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D86754 --- llvm/lib/Target/PowerPC/PPC.td | 11 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 109 ++++++- llvm/lib/Target/PowerPC/PPCInstrInfo.h | 13 + llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 + llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 + llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 4 + .../test/CodeGen/PowerPC/fusion-load-store.ll | 268 ++++++++++++++++++ .../PowerPC/pcrel-call-linkage-leaf.ll | 2 +- 8 files changed, 405 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fusion-load-store.ll diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index a617715d4bd86f..1b38a6f1d13d99 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -174,6 +174,9 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", "HasAddisLoadFusion", "true", "Power8 Addis-Load fusion", [FeatureFusion]>; +def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", + "Target supports store clustering", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -345,10 +348,12 @@ def ProcessorFeatures { // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. + list FusionFeatures = [FeatureStoreFusion]; list P10AdditionalFeatures = - [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, - FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, - FeaturePairedVectorMemops]; + !listconcat(FusionFeatures, [ + DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, + FeaturePairedVectorMemops]); list P10SpecificFeatures = []; list P10InheritableFeatures = !listconcat(P9InheritableFeatures, P10AdditionalFeatures); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 0732e0f0ace362..2c4549899e0c30 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2222,6 +2222,112 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return true; } +bool PPCInstrInfo::getMemOperandsWithOffsetWidth( + const MachineInstr &LdSt, SmallVectorImpl &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const { + const MachineOperand *BaseOp; + if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) + return false; + BaseOps.push_back(BaseOp); + return true; +} + +static bool isLdStSafeToCluster(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI) { + // If this is a volatile load/store, don't mess with it. + if (LdSt.hasOrderedMemoryRef()) + return false; + + if (LdSt.getOperand(2).isFI()) + return true; + + assert(LdSt.getOperand(2).isReg() && "Expected a reg operand."); + // Can't cluster if the instruction modifies the base register + // or it is update form. e.g. ld r2,3(r2) + if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) + return false; + + return true; +} + +// Only cluster instruction pair that have the same opcode, and they are +// clusterable according to PowerPC specification. +static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, + const PPCSubtarget &Subtarget) { + switch (FirstOpc) { + default: + return false; + case PPC::STD: + case PPC::STFD: + case PPC::STXSD: + case PPC::DFSTOREf64: + return FirstOpc == SecondOpc; + // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with + // 32bit and 64bit instruction selection. They are clusterable pair though + // they are different opcode. + case PPC::STW: + case PPC::STW8: + return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; + } +} + +bool PPCInstrInfo::shouldClusterMemOps( + ArrayRef BaseOps1, + ArrayRef BaseOps2, unsigned NumLoads, + unsigned NumBytes) const { + + assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); + const MachineOperand &BaseOp1 = *BaseOps1.front(); + const MachineOperand &BaseOp2 = *BaseOps2.front(); + assert(BaseOp1.isReg() || + BaseOp1.isFI() && + "Only base registers and frame indices are supported."); + + // The NumLoads means the number of loads that has been clustered. + // Don't cluster memory op if there are already two ops clustered at least. + if (NumLoads > 2) + return false; + + // Cluster the load/store only when they have the same base + // register or FI. + if ((BaseOp1.isReg() != BaseOp2.isReg()) || + (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || + (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) + return false; + + // Check if the load/store are clusterable according to the PowerPC + // specification. + const MachineInstr &FirstLdSt = *BaseOp1.getParent(); + const MachineInstr &SecondLdSt = *BaseOp2.getParent(); + unsigned FirstOpc = FirstLdSt.getOpcode(); + unsigned SecondOpc = SecondLdSt.getOpcode(); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + // Cluster the load/store only when they have the same opcode, and they are + // clusterable opcode according to PowerPC specification. + if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) + return false; + + // Can't cluster load/store that have ordered or volatile memory reference. + if (!isLdStSafeToCluster(FirstLdSt, TRI) || + !isLdStSafeToCluster(SecondLdSt, TRI)) + return false; + + int64_t Offset1 = 0, Offset2 = 0; + unsigned Width1 = 0, Width2 = 0; + const MachineOperand *Base1 = nullptr, *Base2 = nullptr; + if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || + !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || + Width1 != Width2) + return false; + + assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 && + "getMemOperandWithOffsetWidth return incorrect base op"); + // The caller should already have ordered FirstMemOp/SecondMemOp by offset. + assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); + return Offset1 + Width1 == Offset2; +} + /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -4664,7 +4770,8 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( return false; // Handle only loads/stores with base register followed by immediate offset. - if (LdSt.getNumExplicitOperands() != 3) + if (!LdSt.getOperand(1).isImm() || + (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) return false; if (!LdSt.getOperand(1).isImm() || (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 75e8224892f4c5..2f867b16aa24fb 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -494,6 +494,19 @@ class PPCInstrInfo : public PPCGenInstrInfo { int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; + /// Get the base operand and byte offset of an instruction that reads/writes + /// memory. + bool getMemOperandsWithOffsetWidth( + const MachineInstr &MI, SmallVectorImpl &BaseOps, + int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, + const TargetRegisterInfo *TRI) const override; + + /// Returns true if the two given memory operations should be scheduled + /// adjacent. + bool shouldClusterMemOps(ArrayRef BaseOps1, + ArrayRef BaseOps2, + unsigned NumLoads, unsigned NumBytes) const override; + /// Return true if two MIs access different memory addresses and false /// otherwise bool diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 8021cfa4a18c6e..05922dbb38fc6a 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -108,6 +108,7 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFloat128 = false; HasFusion = false; + HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 76b43dfc7a723f..0a134bb83ed2fa 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -137,6 +137,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasHTM; bool HasFloat128; bool HasFusion; + bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; @@ -308,6 +309,7 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } + bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } bool needsSwapsForVSXMemOps() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index ea9b37de6ff390..c5671d6c73e055 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -271,6 +271,8 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); @@ -285,6 +287,8 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( std::make_unique(C) : std::make_unique(C), true); // add DAG Mutations here. + if (ST.hasStoreFusion()) + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); return DAG; diff --git a/llvm/test/CodeGen/PowerPC/fusion-load-store.ll b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll new file mode 100644 index 00000000000000..75b2eca2168c0f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll @@ -0,0 +1,268 @@ +; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The +; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused. + +; REQUIRES: asserts +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ +; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \ +; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s + +define i64 @store_i64(i64* nocapture %P, i64 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store i64 %v, i64* %arrayidx3 + ret i64 %v +} + +define i32 @store_i32(i32* nocapture %P, i32 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52 +; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48 +; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44 +; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48 +; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44 +; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52 +; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56 + %arrayidx = getelementptr inbounds i32, i32* %P, i32 13 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14 + store i32 %v, i32* %arrayidx3 + ret i32 %v +} + +define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i64_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 + store i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1 + store i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2 + store i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4 + store i64 %v, i64* %arrayidx3 + ret void +} + +define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12 +; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4 +; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8 +; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_neg:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4 +; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8 +; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12 +; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16 + %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 + store i32 %v, i32* %arrayidx + %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1 + store i32 %v, i32* %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2 + store i32 %v, i32* %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4 + store i32 %v, i32* %arrayidx3 + ret void +} + +define void @store_double(double* nocapture %P, double %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_double:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24 +; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8 +; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16 +; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_double:%bb.0 +; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8 +; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16 +; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24 +; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32 + %arrayidx = getelementptr inbounds double, double* %P, i64 3 + store double %v, double* %arrayidx + %arrayidx1 = getelementptr inbounds double, double* %P, i64 1 + store double %v, double* %arrayidx1 + %arrayidx2 = getelementptr inbounds double, double* %P, i64 2 + store double %v, double* %arrayidx2 + %arrayidx3 = getelementptr inbounds double, double* %P, i64 4 + store double %v, double* %arrayidx3 + ret void +} + +define void @store_float(float* nocapture %P, float %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_float:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12 +; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4 +; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8 +; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_float:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12 +; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4 +; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8 +; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16 + %arrayidx = getelementptr inbounds float, float* %P, i64 3 + store float %v, float* %arrayidx + %arrayidx1 = getelementptr inbounds float, float* %P, i64 1 + store float %v, float* %arrayidx1 + %arrayidx2 = getelementptr inbounds float, float* %P, i64 2 + store float %v, float* %arrayidx2 + %arrayidx3 = getelementptr inbounds float, float* %P, i64 4 + store float %v, float* %arrayidx3 + ret void +} + +; Cannot fuse the store/load if there is volatile in between +define i64 @store_volatile(i64* nocapture %P, i64 %v) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_volatile:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 +; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 +; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 +; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_volatile:%bb.0 +; CHECK-NOT: Cluster ld/st +; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24 +; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16 +; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8 +; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 + %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 + store volatile i64 %v, i64* %arrayidx + %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 + store volatile i64 %v, i64* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 + store volatile i64 %v, i64* %arrayidx2 + %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 + store volatile i64 %v, i64* %arrayidx3 + ret i64 %v +} + +@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4 + +define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]]) +; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24 +; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]]) +; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24 +; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20 + store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 + store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 + %add = add nsw i32 %n, %m + store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4 + ret void +} + +define void @store_i32_stw8(i32 signext %m, i32 signext %n) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) +; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24 +; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28 +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_i32_stw8:%bb.0 +; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) +; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24 +; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28 + store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 + store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 + ret void +} + +declare void @bar(i64*) + +define void @store_frame_index(i32 %a, i32 %b) { +entry: +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: store_frame_index:%bb.0 +; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) +; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf +; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf + %buf = alloca [8 x i64], align 8 + %0 = bitcast [8 x i64]* %buf to i8* + %conv = zext i32 %a to i64 + %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0 + store i64 %conv, i64* %arrayidx, align 8 + %conv1 = zext i32 %b to i64 + %arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1 + store i64 %conv1, i64* %arrayidx2, align 8 + call void @bar(i64* nonnull %arrayidx) + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll index 9141fdc735a0ed..1623889200848a 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -104,6 +104,7 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-P9-NOT: .localentry ; CHECK-ALL: # %bb.0: # %entry ; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r11, r4, r3 ; CHECK-S-NEXT: sub r29, r8, r9 ; CHECK-S-NEXT: add r9, r10, r9 @@ -119,7 +120,6 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-S-NEXT: mullw r3, r3, r7 ; CHECK-S-NEXT: sub r2, r6, r7 ; CHECK-S-NEXT: mullw r3, r3, r8 -; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r2 ; CHECK-S-NEXT: mullw r3, r3, r30 From 7907e5516a418fec29137beed3ff985f40e04f17 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Mon, 7 Sep 2020 20:55:05 -0700 Subject: [PATCH 033/161] [Sema] fix /gr warning test case --- clang/test/SemaCXX/no-rtti.cpp | 2 +- clang/test/SemaCXX/no_dynamic_cast.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/SemaCXX/no-rtti.cpp b/clang/test/SemaCXX/no-rtti.cpp index e0b57153c24c9b..f8487a0902dda2 100644 --- a/clang/test/SemaCXX/no-rtti.cpp +++ b/clang/test/SemaCXX/no-rtti.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -fno-rtti %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -fno-rtti %s namespace std { class type_info; diff --git a/clang/test/SemaCXX/no_dynamic_cast.cpp b/clang/test/SemaCXX/no_dynamic_cast.cpp index 4db21d36f4a998..074b02f4668bcf 100644 --- a/clang/test/SemaCXX/no_dynamic_cast.cpp +++ b/clang/test/SemaCXX/no_dynamic_cast.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -fno-rtti-data -fsyntax-only -verify +// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -fno-rtti-data -fsyntax-only -verify namespace std { struct type_info {}; From 247d02396524649a31bc45541f97457e32b8ef48 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 8 Sep 2020 11:14:36 +0700 Subject: [PATCH 034/161] [Test] Auto-generated checks for some IndVarSimplify tests --- .../IndVarSimplify/canonicalize-cmp.ll | 69 +++++++++++++++---- .../IndVarSimplify/lftr-multi-exit.ll | 36 +++++----- .../test/Transforms/IndVarSimplify/pr18223.ll | 20 +++++- 3 files changed, 93 insertions(+), 32 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll index 2b939767284a4c..7c4bad11a5ea58 100644 --- a/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll +++ b/llvm/test/Transforms/IndVarSimplify/canonicalize-cmp.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -indvars < %s | FileCheck %s ; Check that we replace signed comparisons between non-negative values with @@ -6,13 +7,35 @@ target datalayout = "n8:16:32:64" define i32 @test_01(i32 %a, i32 %b, i32* %p) { - ; CHECK-LABEL: @test_01( -; CHECK-NOT: icmp slt -; CHECK: %cmp1 = icmp ult i32 %iv, 100 -; CHECK: %cmp2 = icmp ult i32 %iv, 100 -; CHECK-NOT: %cmp3 -; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_ENTRY:%.*]] +; CHECK: loop.entry: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BE:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV]], 100 +; CHECK-NEXT: br i1 [[CMP1]], label [[B1:%.*]], label [[B2:%.*]] +; CHECK: b1: +; CHECK-NEXT: store i32 [[IV]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: b2: +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[IV]], 100 +; CHECK-NEXT: br i1 [[CMP2]], label [[B3:%.*]], label [[B4:%.*]] +; CHECK: b3: +; CHECK-NEXT: store i32 [[IV]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: b4: +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: loop.be: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_ENTRY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 999 +; entry: br label %loop.entry @@ -52,13 +75,35 @@ exit: } define i32 @test_02(i32 %a, i32 %b, i32* %p) { - ; CHECK-LABEL: @test_02( -; CHECK-NOT: icmp sgt -; CHECK: %cmp1 = icmp ugt i32 100, %iv -; CHECK: %cmp2 = icmp ugt i32 100, %iv -; CHECK-NOT: %cmp3 -; CHECK: %exitcond = icmp ne i32 %iv.next, 1000 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_ENTRY:%.*]] +; CHECK: loop.entry: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BE:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 100, [[IV]] +; CHECK-NEXT: br i1 [[CMP1]], label [[B1:%.*]], label [[B2:%.*]] +; CHECK: b1: +; CHECK-NEXT: store i32 [[IV]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[MERGE:%.*]] +; CHECK: b2: +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 100, [[IV]] +; CHECK-NEXT: br i1 [[CMP2]], label [[B3:%.*]], label [[B4:%.*]] +; CHECK: b3: +; CHECK-NEXT: store i32 [[IV]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: b4: +; CHECK-NEXT: store i32 [[B:%.*]], i32* [[P]], align 4 +; CHECK-NEXT: br label [[LOOP_BE]] +; CHECK: loop.be: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_ENTRY]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 999 +; entry: br label %loop.entry diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll index 66951eda7a575c..7dfd4ebc001583 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-multi-exit.ll @@ -19,7 +19,7 @@ define void @analyzeable_early_exit(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -49,12 +49,12 @@ define void @unanalyzeable_early_exit() { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -89,12 +89,12 @@ define void @multiple_early_exits(i32 %n, i32 %m) { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] ; CHECK: continue: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV]], [[M:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LATCH]], label [[EXIT]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -137,7 +137,7 @@ define void @compound_early_exit(i32 %n, i32 %m) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -174,8 +174,8 @@ define void @unanalyzeable_latch(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[VOL]], 1000 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -210,7 +210,7 @@ define void @single_exit_no_latch(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: store i32 [[IV]], i32* @A +; CHECK-NEXT: store i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -243,11 +243,11 @@ define void @no_latch_exit(i32 %n, i32 %m) { ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[CONTINUE:%.*]], label [[EXIT:%.*]] ; CHECK: continue: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV]], [[M:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LATCH]], label [[EXIT]] ; CHECK: latch: -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: br label [[LOOP]] ; CHECK: exit: @@ -287,7 +287,7 @@ define void @combine_ivs(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 999 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -324,7 +324,7 @@ define void @combine_ivs2(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -362,7 +362,7 @@ define void @simplify_exit_test(i32 %n) { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[IV_NEXT]], 65 ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -396,13 +396,13 @@ define void @simplify_exit_test2(i32 %n) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A +; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, i32* @A, align 4 ; CHECK-NEXT: [[EARLYCND:%.*]] = icmp ne i32 [[VOL]], 0 ; CHECK-NEXT: br i1 [[EARLYCND]], label [[LATCH]], label [[EXIT:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[FX:%.*]] = udiv i32 [[IV]], 4 -; CHECK-NEXT: store volatile i32 [[IV]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV]], i32* @A, align 4 ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[FX]], 1024 ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: @@ -442,12 +442,12 @@ define void @nested(i32 %n) { ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: ; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV1_NEXT:%.*]], [[OUTER_LATCH:%.*]] ] -; CHECK-NEXT: store volatile i32 [[IV1]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV1]], i32* @A, align 4 ; CHECK-NEXT: [[IV1_NEXT]] = add nuw nsw i32 [[IV1]], 1 ; CHECK-NEXT: br label [[INNER:%.*]] ; CHECK: inner: ; CHECK-NEXT: [[IV2:%.*]] = phi i32 [ 0, [[OUTER]] ], [ [[IV2_NEXT:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: store volatile i32 [[IV2]], i32* @A +; CHECK-NEXT: store volatile i32 [[IV2]], i32* @A, align 4 ; CHECK-NEXT: [[IV2_NEXT]] = add nuw nsw i32 [[IV2]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[IV2]], 20 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER_LATCH]], label [[EXIT_LOOPEXIT:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/pr18223.ll b/llvm/test/Transforms/IndVarSimplify/pr18223.ll index f922aa424a17e7..da620c80621989 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr18223.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr18223.ll @@ -1,12 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -indvars -S < %s | FileCheck %s ; indvars should transform the phi node pair from the for-loop -; CHECK-LABEL: @main( -; CHECK: ret = phi i32 [ 0, %entry ], [ 0, {{.*}} ] @c = common global i32 0, align 4 define i32 @main() #0 { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @c, align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[RET:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 0, [[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[RET]] +; entry: %0 = load i32, i32* @c, align 4 %tobool = icmp eq i32 %0, 0 From 79651265b2e08e105f3d66d5f75bc9f5fa803e45 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 27 Aug 2020 20:34:07 -0500 Subject: [PATCH 035/161] [Attributor][FIX] Properly return changed if the IR was modified Deleting or replacing anything is certainly a modification. This caused a later assertion in IPSCCP when compiling 400.perlbench with the new PM. I'm not sure how to test this. --- llvm/lib/Transforms/IPO/Attributor.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index ea285b51982c13..f020c4aaf1dfd7 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1306,9 +1306,27 @@ ChangeStatus Attributor::cleanupIR() { CGUpdater.removeFunction(*Fn); } + if (!ToBeChangedUses.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!ToBeChangedToUnreachableInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + if (!ToBeDeletedFunctions.empty()) ManifestChange = ChangeStatus::CHANGED; + if (!ToBeDeletedBlocks.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!ToBeDeletedInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!InvokeWithDeadSuccessor.empty()) + ManifestChange = ChangeStatus::CHANGED; + + if (!DeadInsts.empty()) + ManifestChange = ChangeStatus::CHANGED; + NumFnDeleted += ToBeDeletedFunctions.size(); LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted From ff70c25d76561d0789743fa9f718dcd520199a7c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 3 Sep 2020 11:08:39 -0500 Subject: [PATCH 036/161] [Attributor][NFC] Expand `auto` types (clang-fix-it) --- llvm/lib/Transforms/IPO/Attributor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index f020c4aaf1dfd7..d5c33f08827d24 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1449,7 +1449,7 @@ static void createShallowWrapper(Function &F) { BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper); SmallVector Args; - auto FArgIt = F.arg_begin(); + Argument *FArgIt = F.arg_begin(); for (Argument &Arg : Wrapper->args()) { Args.push_back(&Arg); Arg.setName((FArgIt++)->getName()); @@ -1773,8 +1773,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures( assert(Success && "Assumed call site replacement to succeed!"); // Rewire the arguments. - auto OldFnArgIt = OldFn->arg_begin(); - auto NewFnArgIt = NewFn->arg_begin(); + Argument *OldFnArgIt = OldFn->arg_begin(); + Argument *NewFnArgIt = NewFn->arg_begin(); for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum, ++OldFnArgIt) { if (const std::unique_ptr &ARI = From 8637acac5a3f4688114290b524eb5154a0bcdbdf Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 5 Sep 2020 13:26:20 -0500 Subject: [PATCH 037/161] [Attributor][NFC] Clang tidy: no else after continue --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b76e83def6e803..0fa5ad92c299e1 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1141,11 +1141,13 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) { RVState RVS({NewRVsMap, Unused, RetValAAIt.second}); VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB); continue; - } else if (isa(RetVal)) { + } + if (isa(RetVal)) { // Call sites are resolved by the callee attribute over time, no need to // do anything for us. continue; - } else if (isa(RetVal)) { + } + if (isa(RetVal)) { // Constants are valid everywhere, we can simply take them. NewRVsMap[RetVal].insert(RIs.begin(), RIs.end()); continue; From e6208849c8d63690ac3489813eb13196df7ed8dc Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 3 Sep 2020 16:13:28 -0500 Subject: [PATCH 038/161] [Attributor][NFC] Change variable spelling --- llvm/lib/Transforms/IPO/Attributor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index d5c33f08827d24..ac9b48a5376372 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -132,11 +132,11 @@ static cl::opt PrintDependencies("attributor-print-dep", cl::Hidden, /// Logic operators for the change status enum class. /// ///{ -ChangeStatus llvm::operator|(ChangeStatus l, ChangeStatus r) { - return l == ChangeStatus::CHANGED ? l : r; +ChangeStatus llvm::operator|(ChangeStatus L, ChangeStatus R) { + return L == ChangeStatus::CHANGED ? L : R; } -ChangeStatus llvm::operator&(ChangeStatus l, ChangeStatus r) { - return l == ChangeStatus::UNCHANGED ? l : r; +ChangeStatus llvm::operator&(ChangeStatus L, ChangeStatus R) { + return L == ChangeStatus::UNCHANGED ? L : R; } ///} From 53e4ef7fc25903430436ce456909d97aaa0fd6b2 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Thu, 3 Sep 2020 23:42:33 -0500 Subject: [PATCH 039/161] [Attributor][NFC] Cleanup internalize test case One run line was different and probably introduced for the manually added function attribute & name checks. We can do this with the script and a check prefix used for the other run lines as well. --- .../test/Transforms/Attributor/internalize.ll | 71 +++++++------------ 1 file changed, 24 insertions(+), 47 deletions(-) diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 8a244b5c998c39..25f16474e83401 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -12,16 +12,14 @@ ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED -; RUN: opt -attributor -attributor-cgscc -disable-inlining -attributor-allow-deep-wrappers -S < %s | FileCheck %s --check-prefix=DWRAPPER ; TEST 1: This function is of linkage `linkonce`, we cannot internalize this ; function and use information derived from it ; -; DWRAPPER-NOT: Function Attrs -; DWRAPPER-NOT: inner1.internalized +; CHECK-NOT: inner1.internalized define linkonce i32 @inner1(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner1 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -34,11 +32,10 @@ entry: ; TEST 2: This function is of linkage `weak`, we cannot internalize this function and ; use information derived from it ; -; DWRAPPER-NOT: Function Attrs -; DWRAPPER-NOT: inner2.internalized +; CHECK-NOT: inner2.internalized define weak i32 @inner2(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner2 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -51,17 +48,12 @@ entry: ; TEST 3: This function is of linkage `linkonce_odr`, which can be internalized using the ; deep wrapper, and the IP information derived from this function can be used ; -; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; DWRAPPER: define private i32 @inner3.internalized(i32 %a, i32 %b) -; DWRAPPER-NEXT: entry: -; DWRAPPER-NEXT: %c = add i32 %a, %b -; DWRAPPER-NEXT: ret i32 %c define linkonce_odr i32 @inner3(i32 %a, i32 %b) { -; CHECK-LABEL: define {{[^@]+}}@inner3 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; CHECK-NEXT: ret i32 [[C]] +; CHECK_DISABLED-LABEL: define {{[^@]+}}@inner3 +; CHECK_DISABLED-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK_DISABLED-NEXT: ret i32 [[C]] ; entry: %c = add i32 %a, %b @@ -71,17 +63,12 @@ entry: ; TEST 4: This function is of linkage `weak_odr`, which can be internalized using the deep ; wrapper ; -; DWRAPPER: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; DWRAPPER: define private i32 @inner4.internalized(i32 %a, i32 %b) -; DWRAPPER-NEXT: entry: -; DWRAPPER-NEXT: %c = add i32 %a, %b -; DWRAPPER-NEXT: ret i32 %c define weak_odr i32 @inner4(i32 %a, i32 %b) { -; CHECK-LABEL: define {{[^@]+}}@inner4 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] -; CHECK-NEXT: ret i32 [[C]] +; CHECK_DISABLED-LABEL: define {{[^@]+}}@inner4 +; CHECK_DISABLED-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK_DISABLED-NEXT: entry: +; CHECK_DISABLED-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] +; CHECK_DISABLED-NEXT: ret i32 [[C]] ; entry: %c = add i32 %a, %b @@ -91,10 +78,10 @@ entry: ; TEST 5: This function has linkage `linkonce_odr` but is never called (num of use = 0), so there ; is no need to internalize this ; -; DWRAPPER-NOT: inner5.internalized +; CHECK-NOT: inner5.internalized define linkonce_odr i32 @inner5(i32 %a, i32 %b) { ; CHECK-LABEL: define {{[^@]+}}@inner5 -; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] ; CHECK-NEXT: ret i32 [[C]] @@ -109,16 +96,8 @@ entry: ; Since the inner3 is internalized, the use of the original function should be replaced by the ; copied one ; -; DWRAPPER-NOT: call i32 @inner1.internalized -; DWRAPPER: call i32 @inner1 -; DWRAPPER-NOT: call i32 @inner2.internalized -; DWRAPPER: call i32 @inner2 -; DWRAPPER-NOT: call i32 @inner3 -; DWRAPPER: call i32 @inner3.internalized -; DWRAPPER-NOT: call i32 @inner4 -; DWRAPPER: call i32 @inner4.internalized define i32 @outer1() { -; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_DISABLED-LABEL: define {{[^@]+}}@outer1() { ; CHECK_DISABLED-NEXT: entry: ; CHECK_DISABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 noundef 1, i32 noundef 2) ; CHECK_DISABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 noundef 1, i32 noundef 2) @@ -126,7 +105,7 @@ define i32 @outer1() { ; CHECK_DISABLED-NEXT: [[RET4:%.*]] = call i32 @inner4(i32 [[RET3]], i32 [[RET3]]) ; CHECK_DISABLED-NEXT: ret i32 [[RET4]] ; -; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() +; CHECK_ENABLED-LABEL: define {{[^@]+}}@outer1() { ; CHECK_ENABLED-NEXT: entry: ; CHECK_ENABLED-NEXT: [[RET1:%.*]] = call i32 @inner1(i32 noundef 1, i32 noundef 2) ; CHECK_ENABLED-NEXT: [[RET2:%.*]] = call i32 @inner2(i32 noundef 1, i32 noundef 2) @@ -145,28 +124,26 @@ entry: define linkonce_odr void @unused_arg(i8) { ; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg -; CHECK_DISABLED-SAME: (i8 [[TMP0:%.*]]) +; CHECK_DISABLED-SAME: (i8 [[TMP0:%.*]]) { ; CHECK_DISABLED-NEXT: unreachable ; unreachable } define void @unused_arg_caller() { -; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; CHECK_DISABLED-LABEL: define {{[^@]+}}@unused_arg_caller() { ; CHECK_DISABLED-NEXT: call void @unused_arg(i8 noundef 0) ; CHECK_DISABLED-NEXT: ret void ; ; IS__TUNIT_____ENABLED: Function Attrs: nofree noreturn nosync nounwind readnone willreturn -; IS__TUNIT_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; IS__TUNIT_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller +; IS__TUNIT_____ENABLED-SAME: () [[ATTR1:#.*]] { ; IS__TUNIT_____ENABLED-NEXT: unreachable ; ; IS__CGSCC_____ENABLED: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn -; IS__CGSCC_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller() +; IS__CGSCC_____ENABLED-LABEL: define {{[^@]+}}@unused_arg_caller +; IS__CGSCC_____ENABLED-SAME: () [[ATTR2:#.*]] { ; IS__CGSCC_____ENABLED-NEXT: unreachable -; -; DWRAPPER: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn -; DWRAPPER-LABEL: define {{[^@]+}}@unused_arg_caller() -; DWRAPPER-NEXT: unreachable ; call void @unused_arg(i8 0) ret void From 711bf7dcf9546fefe18d32a5772d48e7b5166f08 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 18 Aug 2020 15:32:21 -0500 Subject: [PATCH 040/161] [Attributor][FIX] Don't crash on internalizing linkonce_odr hidden functions The CloneFunctionInto has implicit requirements with regards to the linkage and visibility of the function. We now update these after we did the CloneFunctionInto on the copy with the same linkage and visibility as the original. --- llvm/lib/Transforms/IPO/Attributor.cpp | 10 +++++++--- llvm/test/Transforms/Attributor/internalize.ll | 11 +++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index ac9b48a5376372..32420e847129f1 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1481,9 +1481,8 @@ static Function *internalizeFunction(Function &F) { FunctionType *FnTy = F.getFunctionType(); // create a copy of the current function - Function *Copied = - Function::Create(FnTy, GlobalValue::PrivateLinkage, F.getAddressSpace(), - F.getName() + ".internalized"); + Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), + F.getName() + ".internalized"); ValueToValueMapTy VMap; auto *NewFArgIt = Copied->arg_begin(); for (auto &Arg : F.args()) { @@ -1496,6 +1495,11 @@ static Function *internalizeFunction(Function &F) { // Copy the body of the original function to the new one CloneFunctionInto(Copied, &F, VMap, /* ModuleLevelChanges */ false, Returns); + // Set the linakage and visibility late as CloneFunctionInto has some implicit + // requirements. + Copied->setVisibility(GlobalValue::DefaultVisibility); + Copied->setLinkage(GlobalValue::PrivateLinkage); + // Copy metadata SmallVector, 1> MDs; F.getAllMetadata(MDs); diff --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll index 25f16474e83401..3e485382e9be0f 100644 --- a/llvm/test/Transforms/Attributor/internalize.ll +++ b/llvm/test/Transforms/Attributor/internalize.ll @@ -148,3 +148,14 @@ define void @unused_arg_caller() { call void @unused_arg(i8 0) ret void } + +; Don't crash on linkonce_odr hidden functions +define linkonce_odr hidden void @__clang_call_terminate() { +; CHECK_DISABLED-LABEL: define {{[^@]+}}@__clang_call_terminate() { +; CHECK_DISABLED-NEXT: call void @__clang_call_terminate() +; CHECK_DISABLED-NEXT: unreachable +; + call void @__clang_call_terminate() + unreachable +} + From e59d9df774ed7d94455b224f0e3f6eaeae707259 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 7 Sep 2020 21:44:26 -0700 Subject: [PATCH 041/161] [ELF] --symbol-ordering-file: optimize a loop --- lld/ELF/Writer.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index b26817b66e2711..5ef37e9ecb895f 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1346,9 +1346,11 @@ static DenseMap buildSectionOrder() { addSym(*sym); for (InputFile *file : objectFiles) - for (Symbol *sym : file->getSymbols()) - if (sym->isLocal()) - addSym(*sym); + for (Symbol *sym : file->getSymbols()) { + if (!sym->isLocal()) + break; + addSym(*sym); + } if (config->warnSymbolOrdering) for (auto orderEntry : symbolOrder) From 78071fb52456f5da9d044588e58a946c0ad96830 Mon Sep 17 00:00:00 2001 From: Andrew Wei Date: Tue, 8 Sep 2020 13:14:53 +0800 Subject: [PATCH 042/161] [LSR] Canonicalize a formula before insert it into the list In GenerateConstantOffsetsImpl, we may generate non canonical Formula if BaseRegs of that Formula is updated and includes a recurrent expr reg related with current loop while its ScaledReg is not. Patched by: mdchen Reviewed By: qcolombet Differential Revision: https://reviews.llvm.org/D86939 --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 8 +- .../LoopStrengthReduce/AArch64/pr47329.ll | 299 ++++++++++++++++++ 2 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index c3e46c1fadef31..47329fa1f043e2 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -3834,10 +3834,14 @@ void LSRInstance::GenerateConstantOffsetsImpl( F.BaseOffset = (uint64_t)F.BaseOffset + Imm; if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) return; - if (IsScaledReg) + if (IsScaledReg) { F.ScaledReg = G; - else + } else { F.BaseRegs[Idx] = G; + // We may generate non canonical Formula if G is a recurrent expr reg + // related with current loop while F.ScaledReg is not. + F.canonicalize(*L); + } (void)InsertFormula(LU, LUIdx, F); } diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll new file mode 100644 index 00000000000000..bd2d6b4b0b4cac --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr47329.ll @@ -0,0 +1,299 @@ +; RUN: opt < %s -loop-reduce +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@d = internal unnamed_addr global i32** null, align 8 + +define dso_local i32 @main() local_unnamed_addr { +entry: + %.pre.pre = load i32**, i32*** @d, align 8 + br label %for.body9 + +for.body9: ; preds = %for.body9, %entry + %i = phi i32** [ %.pre.pre, %entry ], [ %incdec.ptr, %for.body9 ] + %incdec.ptr = getelementptr inbounds i32*, i32** %i, i64 -1 + br i1 undef, label %for.body9, label %for.inc + +for.inc: ; preds = %for.body9 + br label %for.body9.118 + +for.body9.1: ; preds = %for.inc.547, %for.body9.1 + %i1 = phi i32** [ %incdec.ptr.1, %for.body9.1 ], [ %incdec.ptr.542, %for.inc.547 ] + %incdec.ptr.1 = getelementptr inbounds i32*, i32** %i1, i64 -1 + br i1 undef, label %for.body9.1, label %for.inc.1 + +for.inc.1: ; preds = %for.body9.1 + br label %for.body9.1.1 + +for.body9.2: ; preds = %for.inc.1.5, %for.body9.2 + %i2 = phi i32** [ %incdec.ptr.2, %for.body9.2 ], [ %incdec.ptr.1.5, %for.inc.1.5 ] + %incdec.ptr.2 = getelementptr inbounds i32*, i32** %i2, i64 -1 + br i1 undef, label %for.body9.2, label %for.inc.2 + +for.inc.2: ; preds = %for.body9.2 + br label %for.body9.2.1 + +for.body9.3: ; preds = %for.inc.2.5, %for.body9.3 + %i3 = phi i32** [ %incdec.ptr.3, %for.body9.3 ], [ %incdec.ptr.2.5, %for.inc.2.5 ] + %incdec.ptr.3 = getelementptr inbounds i32*, i32** %i3, i64 -1 + br i1 undef, label %for.body9.3, label %for.inc.3 + +for.inc.3: ; preds = %for.body9.3 + br label %for.body9.3.1 + +for.body9.4: ; preds = %for.inc.3.5, %for.body9.4 + %i4 = phi i32** [ %incdec.ptr.4, %for.body9.4 ], [ %incdec.ptr.3.5, %for.inc.3.5 ] + %incdec.ptr.4 = getelementptr inbounds i32*, i32** %i4, i64 -1 + br i1 undef, label %for.body9.4, label %for.inc.4 + +for.inc.4: ; preds = %for.body9.4 + br label %for.body9.4.1 + +for.body9.5: ; preds = %for.inc.4.5, %for.body9.5 + %i5 = phi i32** [ %incdec.ptr.5, %for.body9.5 ], [ %incdec.ptr.4.5, %for.inc.4.5 ] + %incdec.ptr.5 = getelementptr inbounds i32*, i32** %i5, i64 -1 + br i1 undef, label %for.body9.5, label %for.inc.5 + +for.inc.5: ; preds = %for.body9.5 + br label %for.body9.5.1 + +for.body9.5.1: ; preds = %for.body9.5.1, %for.inc.5 + %i6 = phi i32** [ %incdec.ptr.5.1, %for.body9.5.1 ], [ %incdec.ptr.5, %for.inc.5 ] + %incdec.ptr.5.1 = getelementptr inbounds i32*, i32** %i6, i64 -1 + br i1 undef, label %for.body9.5.1, label %for.inc.5.1 + +for.inc.5.1: ; preds = %for.body9.5.1 + br label %for.body9.5.2 + +for.body9.5.2: ; preds = %for.body9.5.2, %for.inc.5.1 + %i7 = phi i32** [ %incdec.ptr.5.2, %for.body9.5.2 ], [ %incdec.ptr.5.1, %for.inc.5.1 ] + %incdec.ptr.5.2 = getelementptr inbounds i32*, i32** %i7, i64 -1 + br i1 undef, label %for.body9.5.2, label %for.inc.5.2 + +for.inc.5.2: ; preds = %for.body9.5.2 + br label %for.body9.5.3 + +for.body9.5.3: ; preds = %for.body9.5.3, %for.inc.5.2 + %i8 = phi i32** [ %incdec.ptr.5.3, %for.body9.5.3 ], [ %incdec.ptr.5.2, %for.inc.5.2 ] + %incdec.ptr.5.3 = getelementptr inbounds i32*, i32** %i8, i64 -1 + br i1 undef, label %for.body9.5.3, label %for.inc.5.3 + +for.inc.5.3: ; preds = %for.body9.5.3 + br label %for.body9.5.4 + +for.body9.5.4: ; preds = %for.body9.5.4, %for.inc.5.3 + %i9 = phi i32** [ %incdec.ptr.5.4, %for.body9.5.4 ], [ %incdec.ptr.5.3, %for.inc.5.3 ] + %incdec.ptr.5.4 = getelementptr inbounds i32*, i32** %i9, i64 -1 + br i1 undef, label %for.body9.5.4, label %for.inc.5.4 + +for.inc.5.4: ; preds = %for.body9.5.4 + br label %for.body9.5.5 + +for.body9.5.5: ; preds = %for.body9.5.5, %for.inc.5.4 + %i10 = phi i32** [ undef, %for.body9.5.5 ], [ %incdec.ptr.5.4, %for.inc.5.4 ] + %i11 = bitcast i32** %i10 to i64* + %i12 = load i64, i64* %i11, align 8 + br label %for.body9.5.5 + +for.body9.4.1: ; preds = %for.body9.4.1, %for.inc.4 + %i13 = phi i32** [ %incdec.ptr.4.1, %for.body9.4.1 ], [ %incdec.ptr.4, %for.inc.4 ] + %incdec.ptr.4.1 = getelementptr inbounds i32*, i32** %i13, i64 -1 + br i1 undef, label %for.body9.4.1, label %for.inc.4.1 + +for.inc.4.1: ; preds = %for.body9.4.1 + br label %for.body9.4.2 + +for.body9.4.2: ; preds = %for.body9.4.2, %for.inc.4.1 + %i14 = phi i32** [ %incdec.ptr.4.2, %for.body9.4.2 ], [ %incdec.ptr.4.1, %for.inc.4.1 ] + %incdec.ptr.4.2 = getelementptr inbounds i32*, i32** %i14, i64 -1 + br i1 undef, label %for.body9.4.2, label %for.inc.4.2 + +for.inc.4.2: ; preds = %for.body9.4.2 + br label %for.body9.4.3 + +for.body9.4.3: ; preds = %for.body9.4.3, %for.inc.4.2 + %i15 = phi i32** [ %incdec.ptr.4.3, %for.body9.4.3 ], [ %incdec.ptr.4.2, %for.inc.4.2 ] + %incdec.ptr.4.3 = getelementptr inbounds i32*, i32** %i15, i64 -1 + br i1 undef, label %for.body9.4.3, label %for.inc.4.3 + +for.inc.4.3: ; preds = %for.body9.4.3 + br label %for.body9.4.4 + +for.body9.4.4: ; preds = %for.body9.4.4, %for.inc.4.3 + %i16 = phi i32** [ %incdec.ptr.4.4, %for.body9.4.4 ], [ %incdec.ptr.4.3, %for.inc.4.3 ] + %incdec.ptr.4.4 = getelementptr inbounds i32*, i32** %i16, i64 -1 + br i1 undef, label %for.body9.4.4, label %for.inc.4.4 + +for.inc.4.4: ; preds = %for.body9.4.4 + br label %for.body9.4.5 + +for.body9.4.5: ; preds = %for.body9.4.5, %for.inc.4.4 + %i17 = phi i32** [ %incdec.ptr.4.5, %for.body9.4.5 ], [ %incdec.ptr.4.4, %for.inc.4.4 ] + %incdec.ptr.4.5 = getelementptr inbounds i32*, i32** %i17, i64 -1 + br i1 undef, label %for.body9.4.5, label %for.inc.4.5 + +for.inc.4.5: ; preds = %for.body9.4.5 + br label %for.body9.5 + +for.body9.3.1: ; preds = %for.body9.3.1, %for.inc.3 + %i18 = phi i32** [ %incdec.ptr.3.1, %for.body9.3.1 ], [ %incdec.ptr.3, %for.inc.3 ] + %incdec.ptr.3.1 = getelementptr inbounds i32*, i32** %i18, i64 -1 + br i1 undef, label %for.body9.3.1, label %for.inc.3.1 + +for.inc.3.1: ; preds = %for.body9.3.1 + br label %for.body9.3.2 + +for.body9.3.2: ; preds = %for.body9.3.2, %for.inc.3.1 + %i19 = phi i32** [ %incdec.ptr.3.2, %for.body9.3.2 ], [ %incdec.ptr.3.1, %for.inc.3.1 ] + %incdec.ptr.3.2 = getelementptr inbounds i32*, i32** %i19, i64 -1 + br i1 undef, label %for.body9.3.2, label %for.inc.3.2 + +for.inc.3.2: ; preds = %for.body9.3.2 + br label %for.body9.3.3 + +for.body9.3.3: ; preds = %for.body9.3.3, %for.inc.3.2 + %i20 = phi i32** [ %incdec.ptr.3.3, %for.body9.3.3 ], [ %incdec.ptr.3.2, %for.inc.3.2 ] + %incdec.ptr.3.3 = getelementptr inbounds i32*, i32** %i20, i64 -1 + br i1 undef, label %for.body9.3.3, label %for.inc.3.3 + +for.inc.3.3: ; preds = %for.body9.3.3 + br label %for.body9.3.4 + +for.body9.3.4: ; preds = %for.body9.3.4, %for.inc.3.3 + %i21 = phi i32** [ %incdec.ptr.3.4, %for.body9.3.4 ], [ %incdec.ptr.3.3, %for.inc.3.3 ] + %incdec.ptr.3.4 = getelementptr inbounds i32*, i32** %i21, i64 -1 + br i1 undef, label %for.body9.3.4, label %for.inc.3.4 + +for.inc.3.4: ; preds = %for.body9.3.4 + br label %for.body9.3.5 + +for.body9.3.5: ; preds = %for.body9.3.5, %for.inc.3.4 + %i22 = phi i32** [ %incdec.ptr.3.5, %for.body9.3.5 ], [ %incdec.ptr.3.4, %for.inc.3.4 ] + %incdec.ptr.3.5 = getelementptr inbounds i32*, i32** %i22, i64 -1 + br i1 undef, label %for.body9.3.5, label %for.inc.3.5 + +for.inc.3.5: ; preds = %for.body9.3.5 + br label %for.body9.4 + +for.body9.2.1: ; preds = %for.body9.2.1, %for.inc.2 + %i23 = phi i32** [ %incdec.ptr.2.1, %for.body9.2.1 ], [ %incdec.ptr.2, %for.inc.2 ] + %incdec.ptr.2.1 = getelementptr inbounds i32*, i32** %i23, i64 -1 + br i1 undef, label %for.body9.2.1, label %for.inc.2.1 + +for.inc.2.1: ; preds = %for.body9.2.1 + br label %for.body9.2.2 + +for.body9.2.2: ; preds = %for.body9.2.2, %for.inc.2.1 + %i24 = phi i32** [ %incdec.ptr.2.2, %for.body9.2.2 ], [ %incdec.ptr.2.1, %for.inc.2.1 ] + %incdec.ptr.2.2 = getelementptr inbounds i32*, i32** %i24, i64 -1 + br i1 undef, label %for.body9.2.2, label %for.inc.2.2 + +for.inc.2.2: ; preds = %for.body9.2.2 + br label %for.body9.2.3 + +for.body9.2.3: ; preds = %for.body9.2.3, %for.inc.2.2 + %i25 = phi i32** [ %incdec.ptr.2.3, %for.body9.2.3 ], [ %incdec.ptr.2.2, %for.inc.2.2 ] + %incdec.ptr.2.3 = getelementptr inbounds i32*, i32** %i25, i64 -1 + br i1 undef, label %for.body9.2.3, label %for.inc.2.3 + +for.inc.2.3: ; preds = %for.body9.2.3 + br label %for.body9.2.4 + +for.body9.2.4: ; preds = %for.body9.2.4, %for.inc.2.3 + %i26 = phi i32** [ %incdec.ptr.2.4, %for.body9.2.4 ], [ %incdec.ptr.2.3, %for.inc.2.3 ] + %incdec.ptr.2.4 = getelementptr inbounds i32*, i32** %i26, i64 -1 + br i1 undef, label %for.body9.2.4, label %for.inc.2.4 + +for.inc.2.4: ; preds = %for.body9.2.4 + br label %for.body9.2.5 + +for.body9.2.5: ; preds = %for.body9.2.5, %for.inc.2.4 + %i27 = phi i32** [ %incdec.ptr.2.5, %for.body9.2.5 ], [ %incdec.ptr.2.4, %for.inc.2.4 ] + %incdec.ptr.2.5 = getelementptr inbounds i32*, i32** %i27, i64 -1 + br i1 undef, label %for.body9.2.5, label %for.inc.2.5 + +for.inc.2.5: ; preds = %for.body9.2.5 + br label %for.body9.3 + +for.body9.1.1: ; preds = %for.body9.1.1, %for.inc.1 + %i28 = phi i32** [ %incdec.ptr.1.1, %for.body9.1.1 ], [ %incdec.ptr.1, %for.inc.1 ] + %incdec.ptr.1.1 = getelementptr inbounds i32*, i32** %i28, i64 -1 + br i1 undef, label %for.body9.1.1, label %for.inc.1.1 + +for.inc.1.1: ; preds = %for.body9.1.1 + br label %for.body9.1.2 + +for.body9.1.2: ; preds = %for.body9.1.2, %for.inc.1.1 + %i29 = phi i32** [ %incdec.ptr.1.2, %for.body9.1.2 ], [ %incdec.ptr.1.1, %for.inc.1.1 ] + %incdec.ptr.1.2 = getelementptr inbounds i32*, i32** %i29, i64 -1 + br i1 undef, label %for.body9.1.2, label %for.inc.1.2 + +for.inc.1.2: ; preds = %for.body9.1.2 + br label %for.body9.1.3 + +for.body9.1.3: ; preds = %for.body9.1.3, %for.inc.1.2 + %i30 = phi i32** [ %incdec.ptr.1.3, %for.body9.1.3 ], [ %incdec.ptr.1.2, %for.inc.1.2 ] + %incdec.ptr.1.3 = getelementptr inbounds i32*, i32** %i30, i64 -1 + br i1 undef, label %for.body9.1.3, label %for.inc.1.3 + +for.inc.1.3: ; preds = %for.body9.1.3 + br label %for.body9.1.4 + +for.body9.1.4: ; preds = %for.body9.1.4, %for.inc.1.3 + %i31 = phi i32** [ %incdec.ptr.1.4, %for.body9.1.4 ], [ %incdec.ptr.1.3, %for.inc.1.3 ] + %incdec.ptr.1.4 = getelementptr inbounds i32*, i32** %i31, i64 -1 + br i1 undef, label %for.body9.1.4, label %for.inc.1.4 + +for.inc.1.4: ; preds = %for.body9.1.4 + br label %for.body9.1.5 + +for.body9.1.5: ; preds = %for.body9.1.5, %for.inc.1.4 + %i32 = phi i32** [ %incdec.ptr.1.5, %for.body9.1.5 ], [ %incdec.ptr.1.4, %for.inc.1.4 ] + %incdec.ptr.1.5 = getelementptr inbounds i32*, i32** %i32, i64 -1 + br i1 undef, label %for.body9.1.5, label %for.inc.1.5 + +for.inc.1.5: ; preds = %for.body9.1.5 + br label %for.body9.2 + +for.body9.118: ; preds = %for.body9.118, %for.inc + %i33 = phi i32** [ %incdec.ptr, %for.inc ], [ %incdec.ptr.114, %for.body9.118 ] + %incdec.ptr.114 = getelementptr inbounds i32*, i32** %i33, i64 -1 + br i1 undef, label %for.body9.118, label %for.inc.119 + +for.inc.119: ; preds = %for.body9.118 + br label %for.body9.225 + +for.body9.225: ; preds = %for.body9.225, %for.inc.119 + %i34 = phi i32** [ %incdec.ptr.114, %for.inc.119 ], [ %incdec.ptr.221, %for.body9.225 ] + %incdec.ptr.221 = getelementptr inbounds i32*, i32** %i34, i64 -1 + %i35 = bitcast i32** %i34 to i64* + %i36 = load i64, i64* %i35, align 8 + br i1 undef, label %for.body9.225, label %for.inc.226 + +for.inc.226: ; preds = %for.body9.225 + br label %for.body9.332 + +for.body9.332: ; preds = %for.body9.332, %for.inc.226 + %i37 = phi i32** [ %incdec.ptr.221, %for.inc.226 ], [ %incdec.ptr.328, %for.body9.332 ] + %incdec.ptr.328 = getelementptr inbounds i32*, i32** %i37, i64 -1 + br i1 undef, label %for.body9.332, label %for.inc.333 + +for.inc.333: ; preds = %for.body9.332 + br label %for.body9.439 + +for.body9.439: ; preds = %for.body9.439, %for.inc.333 + %i38 = phi i32** [ %incdec.ptr.328, %for.inc.333 ], [ %incdec.ptr.435, %for.body9.439 ] + %incdec.ptr.435 = getelementptr inbounds i32*, i32** %i38, i64 -1 + br i1 undef, label %for.body9.439, label %for.inc.440 + +for.inc.440: ; preds = %for.body9.439 + br label %for.body9.546 + +for.body9.546: ; preds = %for.body9.546, %for.inc.440 + %i39 = phi i32** [ %incdec.ptr.435, %for.inc.440 ], [ %incdec.ptr.542, %for.body9.546 ] + %incdec.ptr.542 = getelementptr inbounds i32*, i32** %i39, i64 -1 + br i1 undef, label %for.body9.546, label %for.inc.547 + +for.inc.547: ; preds = %for.body9.546 + br label %for.body9.1 +} From 28b9ace85f6871cdb48f1483314d8342e099b136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 8 Sep 2020 09:26:39 +0300 Subject: [PATCH 043/161] [clang] Remove a stray semicolon, fixing pedantic GCC warnings. NFC. --- clang/include/clang/AST/IgnoreExpr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/AST/IgnoreExpr.h b/clang/include/clang/AST/IgnoreExpr.h index 15d31f3af99546..0aeb547606a2b1 100644 --- a/clang/include/clang/AST/IgnoreExpr.h +++ b/clang/include/clang/AST/IgnoreExpr.h @@ -19,7 +19,7 @@ namespace clang { namespace detail { /// Given an expression E and functions Fn_1,...,Fn_n : Expr * -> Expr *, /// Return Fn_n(...(Fn_1(E))) -inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; }; +inline Expr *IgnoreExprNodesImpl(Expr *E) { return E; } template Expr *IgnoreExprNodesImpl(Expr *E, FnTy &&Fn, FnTys &&... Fns) { return IgnoreExprNodesImpl(Fn(E), std::forward(Fns)...); From ea795304ec073a63c3c5b4fd0c5579e667201dad Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Tue, 8 Sep 2020 08:05:47 +0200 Subject: [PATCH 044/161] [PowerPC] Add parentheses to silence gcc warning Without gcc 7.4 warns with ../lib/Target/PowerPC/PPCInstrInfo.cpp:2284:25: warning: suggest parentheses around '&&' within '||' [-Wparentheses] BaseOp1.isFI() && ~~~~~~~~~~~~~~~^~ "Only base registers and frame indices are supported."); ~ --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 2c4549899e0c30..9afc0308533ec4 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2280,9 +2280,8 @@ bool PPCInstrInfo::shouldClusterMemOps( assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); const MachineOperand &BaseOp1 = *BaseOps1.front(); const MachineOperand &BaseOp2 = *BaseOps2.front(); - assert(BaseOp1.isReg() || - BaseOp1.isFI() && - "Only base registers and frame indices are supported."); + assert((BaseOp1.isReg() || BaseOp1.isFI()) && + "Only base registers and frame indices are supported."); // The NumLoads means the number of loads that has been clustered. // Don't cluster memory op if there are already two ops clustered at least. From 8ee1419ab688ee2da2ac2cb0cf19db03f4c4742e Mon Sep 17 00:00:00 2001 From: Simon Wallis Date: Tue, 8 Sep 2020 08:04:52 +0100 Subject: [PATCH 045/161] [AARCH64][RegisterCoalescer] clang miscompiles zero-extension to long long Implement AArch64 variant of shouldCoalesce() to detect a known failing case and prevent the coalescing of a 32-bit copy into a 64-bit sign-extending load. Do not coalesce in the following case: COPY where source is bottom 32 bits of a 64-register, and destination is a 32-bit subregister of a 64-bit register, ie it causes the rest of the register to be implicitly set to zero. A mir test has been added. In the test case, the 32-bit copy implements a 32 to 64 bit zero extension and relies on the upper 32 bits being zeroed. Coalescing to the result of the 64-bit load meant overwriting the upper 32 bits incorrectly when the loaded byte was negative. Reviewed By: john.brawn Differential Revision: https://reviews.llvm.org/D85956 --- .../Target/AArch64/AArch64RegisterInfo.cpp | 16 +++++++++ llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 6 ++++ .../CodeGen/AArch64/zext-reg-coalesce.mir | 33 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 2f1317d8f1ea88..b3694411966b56 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -734,3 +734,19 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister( return getBaseRegister(); return getFrameRegister(MF); } + +/// SrcRC and DstRC will be morphed into NewRC if this returns true +bool AArch64RegisterInfo::shouldCoalesce( + MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, + const TargetRegisterClass *DstRC, unsigned DstSubReg, + const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { + if (MI->isCopy() && + ((DstRC->getID() == AArch64::GPR64RegClassID) || + (DstRC->getID() == AArch64::GPR64commonRegClassID)) && + MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg()) + // Do not coalesce in the case of a 32-bit subregister copy + // which implements a 32 to 64 bit zero extension + // which relies on the upper 32 bits being zeroed. + return false; + return true; +} diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index e3c8a77f433f84..d7580d7b683303 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -129,6 +129,12 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo { unsigned getLocalAddressRegister(const MachineFunction &MF) const; bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const; + + /// SrcRC and DstRC will be morphed into NewRC if this returns true + bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, + unsigned SubReg, const TargetRegisterClass *DstRC, + unsigned DstSubReg, const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir new file mode 100644 index 00000000000000..b31144b409fca7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir @@ -0,0 +1,33 @@ +# RUN: llc -mtriple=aarch64-arm-none-eabi -o - %s \ +# RUN: -run-pass simple-register-coalescing | FileCheck %s + +# In this test case, the 32-bit copy implements a 32 to 64 bit zero extension +# and relies on the upper 32 bits being zeroed. +# Coalescing to the result of the 64-bit load meant overwriting +# the upper 32 bits incorrectly when the loaded byte was negative. + +--- | + @c = local_unnamed_addr global i8 -1, align 4 + + define i64 @bug_e(i32 %i32) local_unnamed_addr { + ret i64 0 + } +... +--- +name: bug_e +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0 + + %1:gpr32 = COPY $w0 + %2:gpr64common = ADRP target-flags(aarch64-page) @c + %3:gpr64 = LDRSBXui %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (dereferenceable load 1 from @c, align 4) + %0:gpr32 = COPY %3.sub_32 + ; CHECK: {{.*}}.sub_32:gpr64 = COPY {{.*}}.sub_32 + STRBBui %1, %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (store 1 into @c, align 4) + %8:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32 + $x0 = COPY %8 + ; CHECK: $x0 = COPY + RET_ReallyLR implicit $x0 +... From bb39eb9e7f42ba8d1f86f961d7f887f9d626b733 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 8 Sep 2020 15:30:16 +0800 Subject: [PATCH 046/161] [PowerPC] Fix getMemOperandWithOffsetWidth Commit 3c0b3250 introduced memory cluster under pwr10 target, but a check for operands was unexpectedly removed. This adds it back to avoid regression. --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 9afc0308533ec4..8cb8c82e628334 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -4765,7 +4765,7 @@ MachineInstr *PPCInstrInfo::findLoopInstr( bool PPCInstrInfo::getMemOperandWithOffsetWidth( const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const { - if (!LdSt.mayLoadOrStore()) + if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3) return false; // Handle only loads/stores with base register followed by immediate offset. From 046f2402025c2ac93c1efc02acd60c5222e052f7 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 8 Sep 2020 14:33:47 +0700 Subject: [PATCH 047/161] [Test] More tests where IndVars fails to eliminate a range check --- .../IndVarSimplify/monotonic_checks.ll | 82 ++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll index 988b3923263f63..048254427c5fad 100644 --- a/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll +++ b/llvm/test/Transforms/IndVarSimplify/monotonic_checks.ll @@ -83,8 +83,8 @@ exit: ret i32 0 } -; Monotonic incrementing iv. we should be able to prove that %iv.next s len +; basing on its nsw and the fact that its starting value >s len. define i32 @test_02(i32* %p) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: @@ -164,6 +164,84 @@ exit: ret i32 0 } +define i32 @test_03(i32* %p) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG2:!range !.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[RC:%.*]] = icmp ugt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret i32 -1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %len = load i32, i32* %p, !range !2 + br label %loop + +loop: + %iv = phi i32 [%len, %entry], [%iv.next, %backedge] + %iv.next = add i32 %iv, 1 + %rc = icmp sgt i32 %iv.next, %len + br i1 %rc, label %backedge, label %fail + +backedge: + %loop.cond = icmp ne i32 %iv, 1000 + br i1 %loop.cond, label %loop, label %exit + +fail: + ret i32 -1 + +exit: + ret i32 0 +} + +define i32 @test_04(i32* %p) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[LEN]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], -1 +; CHECK-NEXT: [[RC:%.*]] = icmp slt i32 [[IV_NEXT]], [[LEN]] +; CHECK-NEXT: br i1 [[RC]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: fail: +; CHECK-NEXT: ret i32 -1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %len = load i32, i32* %p, !range !2 + br label %loop + +loop: + %iv = phi i32 [%len, %entry], [%iv.next, %backedge] + %iv.next = add i32 %iv, -1 + %rc = icmp slt i32 %iv.next, %len + br i1 %rc, label %backedge, label %fail + +backedge: + %loop.cond = icmp ne i32 %iv, 0 + br i1 %loop.cond, label %loop, label %exit + +fail: + ret i32 -1 + +exit: + ret i32 0 +} !0 = !{i32 0, i32 2147483647} !1 = !{i32 -2147483648, i32 0} +!2 = !{i32 0, i32 1000} From 69230e75f120141979248becac30ceaca4ab2e87 Mon Sep 17 00:00:00 2001 From: Richard Barton Date: Thu, 3 Sep 2020 11:44:03 +0100 Subject: [PATCH 048/161] [flang] Convert release notes to markdown Switch ReleaseNotes from .rst to .md to match the other docs. At the same time, fix the version number for master. --- flang/docs/ReleaseNotes.md | 87 +++++++++++++++++++++++++++++++++ flang/docs/ReleaseNotes.rst | 96 ------------------------------------- 2 files changed, 87 insertions(+), 96 deletions(-) create mode 100644 flang/docs/ReleaseNotes.md delete mode 100644 flang/docs/ReleaseNotes.rst diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md new file mode 100644 index 00000000000000..b4b00ee65ffb20 --- /dev/null +++ b/flang/docs/ReleaseNotes.md @@ -0,0 +1,87 @@ +# Flang 12.0.0 (In-Progress) Release Notes + +> **warning** +> +> These are in-progress notes for the upcoming LLVM 12.0.0 release. +> Release notes for previous releases can be found on [the Download +> Page](https://releases.llvm.org/download.html). + +## Introduction + +This document contains the release notes for the Flang Fortran frontend, +part of the LLVM Compiler Infrastructure, release 12.0.0. Here we +describe the status of Flang in some detail, including major +improvements from the previous release and new feature work. For the +general LLVM release notes, see [the LLVM +documentation](https://llvm.org/docs/ReleaseNotes.html). All LLVM +releases may be downloaded from the [LLVM releases web +site](https://llvm.org/releases/). + +Note that if you are reading this file from a Git checkout, this +document applies to the *next* release, not the current one. To see the +release notes for a specific release, please see the [releases +page](https://llvm.org/releases/). + +## Known Issues + +These are issues that couldn't be fixed before the release. See the bug +reports for the latest status. + + * ... + +## Introducing Flang + +Flang is LLVM's Fortran front end and is new for the LLVM 11 release. + +Flang is still a work in progress for this release and is included for +experimentation and feedback. + +Flang is able to parse a comprehensive subset of the Fortran language +and check it for correctness. Flang is not yet able to generate LLVM IR +for the source code and thus is unable to compile a running binary. + +Flang is able to unparse the input source code into a canonical form and +emit it to allow testing. Flang can also invoke an external Fortran +compiler on this canonical input. + +Flang's parser has comprehensive support for: + * Fortran 2018 + * OpenMP 4.5 + * OpenACC 3.0 + +Interested users are invited to try to compile their Fortran codes with +flang in and report any issues in parsing or semantic checking in +[bugzilla](https://bugs.llvm.org/enter_bug.cgi?product=flang). + +### Major missing features + + * Flang is not supported on Windows platforms. + +## Using Flang + +Usage: `flang hello.f90 -o hello.bin` + +By default, Flang will parse the Fortran file `hello.f90` then unparse it to a +canonical Fortran source file. Flang will then invoke an external +Fortran compiler to compile this source file and link it, placing the +resulting executable in `hello.bin`. + +To specify the external Fortran compiler, set the `F18_FC` environment +variable to the name of the compiler binary and ensure that it is on your +`PATH`. The default value for `F18_FC` is `gfortran`. + +When invoked with no source input, Flang will wait for input on stdin. +When invoked in this way, Flang performs the same actions as if +called with `-fdebug-measure-parse-tree -funparse` and does not invoke +`F18_FC`. + +For a full list of options that Flang supports, run `flang --help`. + +## Additional Information + +Flang's documentation is located in the `flang/docs/` directory in the +LLVM monorepo. + +If you have any questions or comments about Flang, please feel free to +contact us via the [mailing +list](https://lists.llvm.org/mailman/listinfo/flang-dev). diff --git a/flang/docs/ReleaseNotes.rst b/flang/docs/ReleaseNotes.rst deleted file mode 100644 index bbc7377412d631..00000000000000 --- a/flang/docs/ReleaseNotes.rst +++ /dev/null @@ -1,96 +0,0 @@ -======================================== -Flang 11.0.0 (In-Progress) Release Notes -======================================== - -.. contents:: - :local: - :depth: 2 - -.. warning:: - - These are in-progress notes for the upcoming LLVM 11.0.0 release. - Release notes for previous releases can be found on - `the Download Page `_. - -Introduction -============ - -This document contains the release notes for the Flang Fortran -frontend, part of the LLVM Compiler Infrastructure, release 11.0.0. Here we -describe the status of Flang in some detail, including major -improvements from the previous release and new feature work. For the -general LLVM release notes, see `the LLVM -documentation `_. All LLVM -releases may be downloaded from the `LLVM releases web -site `_. - -Note that if you are reading this file from a Git checkout, this document -applies to the *next* release, not -the current one. To see the release notes for a specific release, please -see the `releases page `_. - -Known Issues -============ - -These are issues that couldn't be fixed before the release. See the bug reports for the latest status. - -- ... - -Introducing Flang -================= - -Flang is LLVM's Fortran front end and is new for the LLVM 11 release. - -Flang is still a work in progress for this release and is included for -experimentation and feedback. - -Flang status ------------- - -Flang is able to parse a comprehensive subset of the Fortran language -and check it for correctness. Flang is not yet able to generate LLVM IR for -the source code and thus is unable to compile a running binary. - -Flang is able to unparse the input source code into a canonical form and emit -it to allow testing. Flang can also invoke an external Fortran compiler on this -canonical input. - -Flang's parser has comprehensive support for: -- Fortran 2018 -- OpenMP 4.5 -- OpenACC 3.0 - -Major missing features ----------------------- - -- Flang is not supported on Windows platforms. - -Using Flang -=========== - -Usage: ``flang hello.f90 -o hello.bin`` - -Flang will parse the Fortran file ``hello.f90`` then unparse it to a canonical -Fortran source file. Flang will then invoke an external Fortran compiler to -compile this source file and link it, placing the resulting executable -in ``hello.bin``. - -To specify the external Fortran compiler, set the ``F18_FC`` environment -variable to the name of the compiler binary and ensure it is on your ``PATH``. -The default value for ``F18_FC`` is ``gfortran``. - -When invoked with no source input, Flang will wait for input on standard in. -When invoked in this way, Flang performs the same actions as if called with -``-fdebug-measure-parse-tree -funparse`` and does not invoke ``F18_FC``. - -For a full list of options that Flang supports, run ``flang --help``. - -Additional Information -====================== - -Flang's documentation is located in the ``flang/docs/`` directory in -the LLVM monorepo. - -If you have any questions or comments about Flang, please feel free to -contact us via the `mailing -list `_. From 3cda69872362526b1672ae23de4ac968b7564c2b Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 8 Sep 2020 16:08:42 +0800 Subject: [PATCH 049/161] [obj2yaml] Stop parsing the debug_str section when it encounters a string without the null terminator. When obj2yaml encounters a string without the null terminator, it should stop parsing the debug_str section. This patch addresses comments in [D86867](https://reviews.llvm.org/D86867#inline-803291). Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87261 --- .../ObjectYAML/MachO/DWARF-debug_str.yaml | 58 +++++++++++++++++++ .../tools/obj2yaml/ELF/DWARF/debug-str.yaml | 24 ++++++++ llvm/tools/obj2yaml/dwarf2yaml.cpp | 20 ++++--- llvm/tools/obj2yaml/elf2yaml.cpp | 2 +- llvm/tools/obj2yaml/macho2yaml.cpp | 6 +- llvm/tools/obj2yaml/obj2yaml.h | 3 +- 6 files changed, 100 insertions(+), 13 deletions(-) diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml index 29247b334a1a91..9bb55ea3509118 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_str.yaml @@ -321,3 +321,61 @@ DWARF: # EMPTY-STRING-NEXT: debug_str: # EMPTY-STRING-NEXT: - '' # EMPTY-STRING-NEXT: ... + +## d) Test generating and dumping a __debug_str section which contains a string without a null terminator. + +# RUN: yaml2obj --docnum=3 %s | obj2yaml | FileCheck %s --check-prefix=NO-TERMINATOR + +# NO-TERMINATOR-NOT: DWARF: +# NO-TERMINATOR: Sections: +# NO-TERMINATOR-NEXT: - sectname: __debug_str +# NO-TERMINATOR-NEXT: segname: __DWARF +# NO-TERMINATOR-NEXT: addr: 0x0000000000000000 +# NO-TERMINATOR-NEXT: size: 7 +# NO-TERMINATOR-NEXT: offset: 0x00000210 +# NO-TERMINATOR-NEXT: align: 0 +# NO-TERMINATOR-NEXT: reloff: 0x00000000 +# NO-TERMINATOR-NEXT: nreloc: 0 +# NO-TERMINATOR-NEXT: flags: 0x00000000 +# NO-TERMINATOR-NEXT: reserved1: 0x00000000 +# NO-TERMINATOR-NEXT: reserved2: 0x00000000 +# NO-TERMINATOR-NEXT: reserved3: 0x00000000 +# NO-TERMINATOR-NEXT: content: '61626300616263' +# NO-TERMINATOR-NEXT: ... + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_str + segname: __DWARF + addr: 0x00 + size: 7 + offset: 0x210 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: '61626300616263' ## "abc\0abc" diff --git a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml index e058642877243b..76c1c5c1b36505 100644 --- a/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml +++ b/llvm/test/tools/obj2yaml/ELF/DWARF/debug-str.yaml @@ -99,3 +99,27 @@ FileHeader: Type: ET_EXEC DWARF: debug_str: [] + +## d) Test that yaml2obj stops parsing the .debug_str section if it encounters a +## string without a null terminator. The output uses a raw content section instead of +## the DWARF tag to represent the broken .debug_str section. + +# RUN: yaml2obj --docnum=3 %s | obj2yaml | FileCheck %s --check-prefix=NO-TERMINATOR + +# NO-TERMINATOR-NOT: DWARF: +# NO-TERMINATOR: Sections: +# NO-TERMINATOR-NEXT: - Name: .debug_str +# NO-TERMINATOR-NEXT: Type: SHT_PROGBITS +# NO-TERMINATOR-NEXT: Flags: [ SHF_MERGE, SHF_STRINGS ] +# NO-TERMINATOR-NEXT: Content: '61626300616263' +# NO-TERMINATOR-NEXT: ... + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .debug_str + Type: SHT_PROGBITS + Content: "61626300616263" ## "abc\0abc" diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp index 513fa0fdef01df..cef7b699805c86 100644 --- a/llvm/tools/obj2yaml/dwarf2yaml.cpp +++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp @@ -46,14 +46,20 @@ void dumpDebugAbbrev(DWARFContext &DCtx, DWARFYAML::Data &Y) { } } -void dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) { - StringRef RemainingTable = DCtx.getDWARFObj().getStrSection(); - Y.DebugStrings.emplace(); - while (RemainingTable.size() > 0) { - auto SymbolPair = RemainingTable.split('\0'); - RemainingTable = SymbolPair.second; - Y.DebugStrings->push_back(SymbolPair.first); +Error dumpDebugStrings(DWARFContext &DCtx, DWARFYAML::Data &Y) { + DataExtractor StrData = DCtx.getStringExtractor(); + uint64_t Offset = 0; + std::vector DebugStr; + Error Err = Error::success(); + while (StrData.isValidOffset(Offset)) { + const char *CStr = StrData.getCStr(&Offset, &Err); + if (Err) + return Err; + DebugStr.push_back(CStr); } + + Y.DebugStrings = DebugStr; + return Err; } Error dumpDebugARanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 9f524479bb04c3..264bc4d1dbf361 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -415,7 +415,7 @@ Optional ELFDumper::dumpDWARFSections( if (RawSec->Name == ".debug_aranges") Err = dumpDebugARanges(*DWARFCtx.get(), DWARF); else if (RawSec->Name == ".debug_str") - dumpDebugStrings(*DWARFCtx.get(), DWARF); + Err = dumpDebugStrings(*DWARFCtx.get(), DWARF); // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp index 3a93d5c6846b59..49347431b9a4f4 100644 --- a/llvm/tools/obj2yaml/macho2yaml.cpp +++ b/llvm/tools/obj2yaml/macho2yaml.cpp @@ -154,10 +154,8 @@ static Error dumpDebugSection(StringRef SecName, DWARFContext &DCtx, } if (SecName == "__debug_ranges") return dumpDebugRanges(DCtx, DWARF); - if (SecName == "__debug_str") { - dumpDebugStrings(DCtx, DWARF); - return Error::success(); - } + if (SecName == "__debug_str") + return dumpDebugStrings(DCtx, DWARF); return createStringError(errc::not_supported, "dumping " + SecName + " section is not supported"); } diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h index 85a7ac9a4787b1..66a2d2753622cd 100644 --- a/llvm/tools/obj2yaml/obj2yaml.h +++ b/llvm/tools/obj2yaml/obj2yaml.h @@ -47,6 +47,7 @@ void dumpDebugPubSections(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); void dumpDebugInfo(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); void dumpDebugLines(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); llvm::Error dumpDebugRanges(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); -void dumpDebugStrings(llvm::DWARFContext &DCtx, llvm::DWARFYAML::Data &Y); +llvm::Error dumpDebugStrings(llvm::DWARFContext &DCtx, + llvm::DWARFYAML::Data &Y); #endif From 9be6178449555576645ac922e342936319445cac Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 8 Sep 2020 03:39:23 -0400 Subject: [PATCH 050/161] [mlir][Vector] Make VectorToSCF deterministic Differential Revision: https://reviews.llvm.org/D87273 --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 11 +++++------ mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 8f7d43829846b8..08d0117e6a17c9 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -584,9 +584,9 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( steps.push_back(std_constant_index(step)); // 2. Emit alloc-copy-load-dealloc. + MLIRContext *ctx = op->getContext(); Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); - Value vec = vector_type_cast(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { auto ivs = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. @@ -595,13 +595,12 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); ArrayRef indicesRef(indices), ivsRef(ivs); - Value pos = - std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); - Value vector = vector_insert_element(remote(indicesRef), - local(ivsRef.drop_back()), pos); + Value pos = std_index_cast(IntegerType::get(32, ctx), ivsRef.back()); + Value scal = remote(indicesRef); + Value vector = vector_insert_element(scal, local(ivsRef.drop_back()), pos); local(ivsRef.drop_back()) = vector; }); - Value vectorValue = std_load(vec); + Value vectorValue = std_load(vector_type_cast(tmp)); // 3. Propagate. rewriter.replaceOp(op, vectorValue); diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 240925baf3d8cd..5e8aea1f511352 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -99,8 +99,8 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: %[[L3:.*]] = select // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] // - // CHECK-DAG: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-DAG: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref + // CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> // CHECK-NEXT: } From 2168dbf4cc766dfb552076d9b1e84b00122b7993 Mon Sep 17 00:00:00 2001 From: Shivanshu Goyal Date: Tue, 8 Sep 2020 10:17:05 +0200 Subject: [PATCH 051/161] getClangStripDependencyFileAdjuster(): Do not remove -M args when using MSVC cl driver MSVC's cl.exe has a few command line arguments which start with -M such as "-MD", "-MDd", "-MT", "-MTd", "-MP". These arguments are not dependency file generation related, and these arguments were being removed by getClangStripDependencyFileAdjuster() which was wrong. Differential revision: https://reviews.llvm.org/D86999 --- clang/lib/Tooling/ArgumentsAdjusters.cpp | 34 ++++++++++++++++++------ clang/unittests/Tooling/ToolingTest.cpp | 34 ++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/clang/lib/Tooling/ArgumentsAdjusters.cpp b/clang/lib/Tooling/ArgumentsAdjusters.cpp index a857b57fbf7bca..bcfb5b39a07706 100644 --- a/clang/lib/Tooling/ArgumentsAdjusters.cpp +++ b/clang/lib/Tooling/ArgumentsAdjusters.cpp @@ -21,6 +21,16 @@ namespace clang { namespace tooling { +static StringRef getDriverMode(const CommandLineArguments &Args) { + for (const auto &Arg : Args) { + StringRef ArgRef = Arg; + if (ArgRef.consume_front("--driver-mode=")) { + return ArgRef; + } + } + return StringRef(); +} + /// Add -fsyntax-only option and drop options that triggers output generation. ArgumentsAdjuster getClangSyntaxOnlyAdjuster() { return [](const CommandLineArguments &Args, StringRef /*unused*/) { @@ -93,20 +103,28 @@ ArgumentsAdjuster getClangStripSerializeDiagnosticAdjuster() { ArgumentsAdjuster getClangStripDependencyFileAdjuster() { return [](const CommandLineArguments &Args, StringRef /*unused*/) { + auto UsingClDriver = (getDriverMode(Args) == "cl"); + CommandLineArguments AdjustedArgs; for (size_t i = 0, e = Args.size(); i < e; ++i) { StringRef Arg = Args[i]; - // All dependency-file options begin with -M. These include -MM, - // -MF, -MG, -MP, -MT, -MQ, -MD, and -MMD. - if (!Arg.startswith("-M") && !Arg.startswith("/showIncludes") && - !Arg.startswith("-showIncludes")) { - AdjustedArgs.push_back(Args[i]); + + // These flags take an argument: -MX foo. Skip the next argument also. + if (!UsingClDriver && (Arg == "-MF" || Arg == "-MT" || Arg == "-MQ")) { + ++i; continue; } + // When not using the cl driver mode, dependency file generation options + // begin with -M. These include -MM, -MF, -MG, -MP, -MT, -MQ, -MD, and + // -MMD. + if (!UsingClDriver && Arg.startswith("-M")) + continue; + // Under MSVC's cl driver mode, dependency file generation is controlled + // using /showIncludes + if (Arg.startswith("/showIncludes") || Arg.startswith("-showIncludes")) + continue; - if (Arg == "-MF" || Arg == "-MT" || Arg == "-MQ") - // These flags take an argument: -MX foo. Skip the next argument also. - ++i; + AdjustedArgs.push_back(Args[i]); } return AdjustedArgs; }; diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp index cc6f453284d719..691a847d5a7158 100644 --- a/clang/unittests/Tooling/ToolingTest.cpp +++ b/clang/unittests/Tooling/ToolingTest.cpp @@ -563,6 +563,40 @@ TEST(ClangToolTest, StripDependencyFileAdjusterShowIncludes) { EXPECT_TRUE(HasFlag("-c")); } +// Check getClangStripDependencyFileAdjuster doesn't strip args when using the +// MSVC cl.exe driver +TEST(ClangToolTest, StripDependencyFileAdjusterMsvc) { + FixedCompilationDatabase Compilations( + "/", {"--driver-mode=cl", "-MD", "-MDd", "-MT", "-O1", "-MTd", "-MP"}); + + ClangTool Tool(Compilations, std::vector(1, "/a.cc")); + Tool.mapVirtualFile("/a.cc", "void a() {}"); + + std::unique_ptr Action( + newFrontendActionFactory()); + + CommandLineArguments FinalArgs; + ArgumentsAdjuster CheckFlagsAdjuster = + [&FinalArgs](const CommandLineArguments &Args, StringRef /*unused*/) { + FinalArgs = Args; + return Args; + }; + Tool.clearArgumentsAdjusters(); + Tool.appendArgumentsAdjuster(getClangStripDependencyFileAdjuster()); + Tool.appendArgumentsAdjuster(CheckFlagsAdjuster); + Tool.run(Action.get()); + + auto HasFlag = [&FinalArgs](const std::string &Flag) { + return llvm::find(FinalArgs, Flag) != FinalArgs.end(); + }; + EXPECT_TRUE(HasFlag("-MD")); + EXPECT_TRUE(HasFlag("-MDd")); + EXPECT_TRUE(HasFlag("-MT")); + EXPECT_TRUE(HasFlag("-O1")); + EXPECT_TRUE(HasFlag("-MTd")); + EXPECT_TRUE(HasFlag("-MP")); +} + // Check getClangStripPluginsAdjuster strips plugin related args. TEST(ClangToolTest, StripPluginsAdjuster) { FixedCompilationDatabase Compilations( From 38778e1087b2825e91b07ce4570c70815b49dcdc Mon Sep 17 00:00:00 2001 From: Serge Guelton Date: Thu, 25 Jun 2020 05:57:01 -0400 Subject: [PATCH 052/161] Provide anchor for compiler extensions This patch is cherry-picked from 04b0a4e22e3b4549f9d241f8a9f37eebecb62a31, and amended to prevent an undefined reference to `llvm::EnableABIBreakingChecks' --- llvm/lib/Extensions/Extensions.cpp | 15 +++++++++++++++ llvm/lib/Extensions/LLVMBuild.txt | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Extensions/Extensions.cpp b/llvm/lib/Extensions/Extensions.cpp index e69de29bb2d1d6..2fe537f91876ad 100644 --- a/llvm/lib/Extensions/Extensions.cpp +++ b/llvm/lib/Extensions/Extensions.cpp @@ -0,0 +1,15 @@ +#include "llvm/Passes/PassPlugin.h" +#define HANDLE_EXTENSION(Ext) \ + llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + + +namespace llvm { + namespace details { + void extensions_anchor() { +#define HANDLE_EXTENSION(Ext) \ + static auto Ext = get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" + } + } +} diff --git a/llvm/lib/Extensions/LLVMBuild.txt b/llvm/lib/Extensions/LLVMBuild.txt index 2005830a4dd7ad..7a98c8f680513a 100644 --- a/llvm/lib/Extensions/LLVMBuild.txt +++ b/llvm/lib/Extensions/LLVMBuild.txt @@ -18,4 +18,4 @@ type = Library name = Extensions parent = Libraries -required_libraries = +required_libraries = Support From 67b37f571cc27d5684125f694d719b114ad72a18 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 8 Sep 2020 08:31:52 +0000 Subject: [PATCH 053/161] [mlir] Conv ops vectorization pass In this commit a new way of convolution ops lowering is introduced. The conv op vectorization pass lowers linalg convolution ops into vector contractions. This lowering is possible when conv op is first tiled by 1 along specific dimensions which transforms it into dot product between input and kernel subview memory buffers. This pass converts such conv op into vector contraction and does all necessary vector transfers that make it work. Differential Revision: https://reviews.llvm.org/D86619 --- .../Dialect/Linalg/Transforms/Transforms.h | 51 ++++++ .../Linalg/Transforms/Vectorization.cpp | 95 ++++++++++ .../LinalgToVector/linalg-to-vector.mlir | 167 ++++++++++++++++++ mlir/test/lib/Transforms/CMakeLists.txt | 1 + .../lib/Transforms/TestConvVectorization.cpp | 51 ++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 6 files changed, 367 insertions(+) create mode 100644 mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir create mode 100644 mlir/test/lib/Transforms/TestConvVectorization.cpp diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index f438b6587c8bce..ce3b5fd2fd2479 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -30,6 +30,10 @@ struct TiledLinalgOp { SmallVector loops; }; +/// Populates patterns for vectorization of all ConvN-D ops. +void populateConvVectorizationPatterns(MLIRContext *context, + OwningRewritePatternList &patterns); + /// Performs standalone tiling of a single LinalgOp by `tileSizes`. /// and permute the loop nest according to `interchangeVector` /// The permutation is expressed as a list of integers that specify @@ -531,6 +535,53 @@ struct AffineMinSCFCanonicalizationPattern PatternRewriter &rewriter) const override; }; +/// Converts Convolution op into vector contraction. +/// +/// Conversion expects ConvOp to have dimensions marked in the *mask* as +/// false of size 1. This ensures that the ConvOp can be lowered to vector +/// contraction of dimensions marked in the *mask* as true. +/// +/// A good example is ConvNHWCOp which is 2D Conv op with channels as the last +/// dimension. For this op we contract last 3 dimensions. +/// The initial op definition looks like this: +/// ``` +/// linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : +/// (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) +/// ``` +/// This op can be expressed as a dot product between %arg0 (input) and +/// %arg1 (kernel) which is written into first entry of %arg2 (output). This is +/// the ConvOp this pass expects and converts into: +/// ``` +/// #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +/// #map1 = affine_map<(d0, d1, d2) -> ()> +/// ..... +/// %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %1 = vector.transfer_read %arg1[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %2 = vector.contract {indexing_maps = [#map0, #map0, #map1], +/// iterator_types = ["reduction", "reduction", "reduction"]} %0, %1, +/// %c0_f32 : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +/// store %2, %arg2[%c0, %c0, %c0, %c0] : memref +/// ``` +/// where first 2 operations read input and kernel memory buffers into vectors. +/// Subsequently, they are contracted together and the result is written to +/// the first entry of the output buffer. +template +struct ConvOpVectorization : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + SmallVector mask; + + ConvOpVectorization(MLIRContext *context, SmallVector msk) + : OpRewritePattern(context) { + assert(msk.size() == N && "Mask size does not match rank"); + this->mask = msk; + } + + LogicalResult matchAndRewrite(ConvOp minOp, + PatternRewriter &rewriter) const override; +}; + //===----------------------------------------------------------------------===// // Support for staged pattern application. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index ada89f1c82b5c8..cd36c753b6f691 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -367,3 +367,98 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( return success(); } + +template +LogicalResult ConvOpVectorization::matchAndRewrite( + ConvOp op, PatternRewriter &rewriter) const { + const uint dimSize = 3; + Location loc = op.getLoc(); + MLIRContext *context = op.getContext(); + edsc::ScopedContext scope(rewriter, loc); + + ShapedType inShapeType = op.getInputShapedType(0); + ShapedType kShapeType = op.getInputShapedType(1); + + ArrayRef inShape = inShapeType.getShape(); + ArrayRef kShape = kShapeType.getShape(); + + if (!inShapeType.hasStaticShape() || !kShapeType.hasStaticShape()) + return failure(); + + SmallVector mapping; + // Fail to apply when the size of not vectorized dimension is not 1 or + // when the size of vectorized dimension is not dimSize. + for (unsigned i = 0; i < N; i++) { + if (!mask[i] && (inShape[i] != 1 || kShape[i] != 1)) + return failure(); + if (mask[i] && (inShape[i] != dimSize || kShape[i] != dimSize)) + return failure(); + + if (mask[i]) + mapping.push_back(getAffineDimExpr(i, context)); + } + + Value input = op.getInput(0); + Value kernel = op.getInput(1); + Value output = op.getOutputBuffer(0); + + uint rank = inShapeType.getRank(); + uint numDims = mapping.size(); + Type elemType = inShapeType.getElementType(); + + auto map = AffineMap::get(rank, 0, mapping, context); + SmallVector zeros(rank, std_constant_index(0)); + auto vecType = + VectorType::get(SmallVector(numDims, dimSize), elemType); + + auto inputVec = vector_transfer_read(vecType, input, zeros, map); + auto kernelVec = vector_transfer_read(vecType, kernel, zeros, map); + + auto acc = std_constant(elemType, rewriter.getZeroAttr(elemType)); + + std::array indexingMaps{ + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::get(numDims, 0, {}, context)}; + + std::vector iteratorTypes(numDims, "reduction"); + + auto result = rewriter.create( + loc, inputVec, kernelVec, acc, + rewriter.getAffineMapArrayAttr(indexingMaps), + rewriter.getStrArrayAttr(iteratorTypes)); + + rewriter.create(loc, result, output, ValueRange(zeros)); + rewriter.eraseOp(op); + return success(); +} + +void mlir::linalg::populateConvVectorizationPatterns( + MLIRContext *context, OwningRewritePatternList &patterns) { + patterns.insert>( + context, SmallVector{true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); +} diff --git a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir new file mode 100644 index 00000000000000..487718301d0058 --- /dev/null +++ b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir @@ -0,0 +1,167 @@ +// RUN: mlir-opt %s -test-conv-vectorization --cse | FileCheck %s + +// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0) -> (d0)> +// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0) -> ()> +// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)> +// CHECK-DAG: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[$map4:.*]] = affine_map<(d0, d1) -> ()> +// CHECK-DAG: #[[$map5:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)> +// CHECK-DAG: #[[$map6:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[$map7:.*]] = affine_map<(d0, d1, d2) -> ()> +// CHECK-DAG: #[[$map8:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d2, d3, d4)> +// CHECK-DAG: #[[$map9:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-DAG: #[[$map10:.*]] = affine_map<(d0, d1, d2, d3) -> ()> + +func @conv_1d(%arg0: memref<3xf32>, %arg1: memref<3xf32>, %arg2: memref) { + linalg.conv_1d %arg0, %arg1, %arg2 : (memref<3xf32>, memref<3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]]], %[[cst]] : memref<3xf32>, vector<3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map0]], #[[$map1]]], iterator_types = ["reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3xf32>, vector<3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]]] : memref +// CHECK: return + +func @conv_1d_ncw(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_ncw %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_ncw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + + +func @conv_1d_nwc(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_nwc %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_nwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>, %arg2: memref) { + linalg.conv_2d %arg0, %arg1, %arg2 : (memref<3x3xf32>, memref<3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]]], %[[cst]] : memref<3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nchw(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nchw %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nchw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nhwc(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d(%arg0: memref<3x3x3xf32>, %arg1: memref<3x3x3xf32>, %arg2: memref) { + linalg.conv_3d %arg0, %arg1, %arg2 : (memref<3x3x3xf32>, memref<3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ncdhw(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ncdhw %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ncdhw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ndhwc(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ndhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ndhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index de894467d63d43..3ac1e7c5523508 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ add_mlir_library(MLIRTestTransforms TestExpandTanh.cpp TestCallGraph.cpp TestConstantFold.cpp + TestConvVectorization.cpp TestConvertCallOp.cpp TestConvertGPUKernelToCubin.cpp TestConvertGPUKernelToHsaco.cpp diff --git a/mlir/test/lib/Transforms/TestConvVectorization.cpp b/mlir/test/lib/Transforms/TestConvVectorization.cpp new file mode 100644 index 00000000000000..37e509cbbbe1ba --- /dev/null +++ b/mlir/test/lib/Transforms/TestConvVectorization.cpp @@ -0,0 +1,51 @@ +//===- TestConvVectorization.cpp - Linalg to Vector dialect conversion ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +namespace { +/// A pass converting MLIR Linalg ops into Vector ops. +class TestConvVectorization + : public PassWrapper> { + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + registry.insert(); + } +}; +} // namespace + +void TestConvVectorization::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + OwningRewritePatternList patterns; + linalg::populateConvVectorizationPatterns(context, patterns); + + if (failed(applyPartialConversion(module, target, patterns))) + return signalPassFailure(); +} + +namespace mlir { +void registerTestConvVectorization() { + PassRegistration testTransformPatternsPass( + "test-conv-vectorization", "Test vectorization of convolutions"); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 34e03a5f992017..437b5f4b6f1a6f 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -45,6 +45,7 @@ void registerTestAllReduceLoweringPass(); void registerTestBufferPlacementPreparationPass(); void registerTestCallGraphPass(); void registerTestConstantFold(); +void registerTestConvVectorization(); void registerTestConvertGPUKernelToCubinPass(); void registerTestConvertGPUKernelToHsacoPass(); void registerTestDominancePass(); @@ -93,6 +94,7 @@ void registerTestPasses() { registerTestAffineLoopUnswitchingPass(); registerTestLoopPermutationPass(); registerTestCallGraphPass(); + registerTestConvVectorization(); registerTestConstantFold(); #if MLIR_CUDA_CONVERSIONS_ENABLED registerTestConvertGPUKernelToCubinPass(); From 239eff502bca64f544f311e7d7a65fdec01cb9c4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 7 Sep 2020 17:39:16 +0200 Subject: [PATCH 054/161] [mlir][VectorOps] Redo the scalar loop emission in VectoToSCF to pad instead of clipping This replaces the select chain for edge-padding with an scf.if that performs the memory operation when the index is in bounds and uses the pad value when it's not. For transfer_write the same mechanism is used, skipping the store when the index is out of bounds. The integration test has a bunch of cases of how I believe this should work. Differential Revision: https://reviews.llvm.org/D87241 --- .../Vector/CPU/test-transfer-to-loops.mlir | 24 +++ .../VectorToLLVM/ConvertVectorToLLVM.cpp | 2 +- .../Conversion/VectorToSCF/VectorToSCF.cpp | 186 +++++++++--------- .../VectorToSCF/vector-to-loops.mlir | 97 +++------ 4 files changed, 151 insertions(+), 158 deletions(-) diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir index 8d965779dfc6df..38cbabc329989a 100644 --- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-to-loops.mlir @@ -4,6 +4,7 @@ // RUN: FileCheck %s #map0 = affine_map<(d0, d1) -> (d1, d0)> +#map1 = affine_map<(d0, d1) -> (d1)> func @print_memref_f32(memref<*xf32>) @@ -29,6 +30,7 @@ func @main() { %c0 = constant 0 : index %c1 = constant 1 : index %c2 = constant 2 : index + %c3 = constant 3 : index %c6 = constant 6 : index %cst = constant -4.2e+01 : f32 %0 = call @alloc_2d_filled_f32(%c6, %c6) : (index, index) -> memref @@ -76,6 +78,28 @@ func @main() { // CHECK-SAME: ( 205, 305, 405, 505, 504 ), // CHECK-SAME: ( 105, 205, 305, 405, 505 ) ) + %3 = vector.transfer_read %0[%c2, %c3], %cst : memref, vector<5x5xf32> + vector.print %3 : vector<5x5xf32> + // New 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 503, 502, -42, -42 ), + // CHECK-SAME: ( 404, 504, 503, -42, -42 ), + // CHECK-SAME: ( 405, 505, 504, -42, -42 ), + // CHECK-SAME: ( 305, 405, 505, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %4 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map0} : memref, vector<5x5xf32> + vector.print %4 : vector<5x5xf32> + // Transposed 5x5 block rooted @{2, 3} in memory. + // CHECK-NEXT: ( ( 403, 404, 405, 305, -42 ), + // CHECK-SAME: ( 503, 504, 505, 405, -42 ), + // CHECK-SAME: ( 502, 503, 504, 505, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ), + // CHECK-SAME: ( -42, -42, -42, -42, -42 ) ) + + %5 = vector.transfer_read %0[%c2, %c3], %cst {permutation_map = #map1} : memref, vector<5xf32> + vector.print %5 : vector<5xf32> + // CHECK-NEXT: ( 403, 503, 502, -42, -42 ) + dealloc %0 : memref return } diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index a43bec855ff0ad..d51a96dca3849d 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -1096,7 +1096,7 @@ static bool isContiguous(MemRefType memRefType, SmallVectorImpl &strides) { int64_t offset; auto successStrides = getStridesAndOffset(memRefType, strides, offset); - bool isContiguous = (strides.back() == 1); + bool isContiguous = strides.empty() || strides.back() == 1; if (isContiguous) { auto sizes = memRefType.getShape(); for (int index = 0, e = strides.size() - 2; index < e; ++index) { diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 08d0117e6a17c9..801ead825ffc94 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -111,15 +111,6 @@ class NDTransferOpHelper { template void emitLoops(Lambda loopBodyBuilder); - /// Operate within the body of `emitLoops` to: - /// 1. Compute the indexings `majorIvs + majorOffsets` and save them in - /// `majorIvsPlusOffsets`. - /// 2. Return a boolean that determines whether the first `majorIvs.rank()` - /// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. - Value emitInBoundsCondition(ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets); - /// Common state to lower vector transfer ops. PatternRewriter &rewriter; const VectorTransferToSCFOptions &options; @@ -196,11 +187,16 @@ static Value onTheFlyFoldSLT(Value v, Value ub) { return slt(v, ub); } -template -Value NDTransferOpHelper::emitInBoundsCondition( - ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, - SmallVectorImpl &majorIvsPlusOffsets) { +/// 1. Compute the indexings `majorIvs + majorOffsets` and save them in +/// `majorIvsPlusOffsets`. +/// 2. Return a value of i1 that determines whether the first `majorIvs.rank()` +/// dimensions `majorIvs + majorOffsets` are all within `memrefBounds`. +static Value +emitInBoundsCondition(PatternRewriter &rewriter, + VectorTransferOpInterface xferOp, unsigned leadingRank, + ValueRange majorIvs, ValueRange majorOffsets, + MemRefBoundsCapture &memrefBounds, + SmallVectorImpl &majorIvsPlusOffsets) { Value inBoundsCondition; majorIvsPlusOffsets.reserve(majorIvs.size()); unsigned idx = 0; @@ -271,7 +267,8 @@ LogicalResult NDTransferOpHelper::doReplace() { // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2. If the condition is not null, we need an IfOp, which may yield @@ -374,7 +371,8 @@ LogicalResult NDTransferOpHelper::doReplace() { // context. SmallVector majorIvsPlusOffsets; Value inBoundsCondition = emitInBoundsCondition( - majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); + rewriter, cast(xferOp.getOperation()), + leadingRank, majorIvs, majorOffsets, memrefBounds, majorIvsPlusOffsets); if (inBoundsCondition) { // 2.a. If the condition is not null, we need an IfOp, to write @@ -424,60 +422,6 @@ static int computeCoalescedIndex(TransferOpTy transfer) { return coalescedIdx; } -/// Emits remote memory accesses that are clipped to the boundaries of the -/// MemRef. -template -static SmallVector -clip(TransferOpTy transfer, MemRefBoundsCapture &bounds, ArrayRef ivs) { - using namespace mlir::edsc; - - Value zero(std_constant_index(0)), one(std_constant_index(1)); - SmallVector memRefAccess(transfer.indices()); - SmallVector clippedScalarAccessExprs(memRefAccess.size()); - // Indices accessing to remote memory are clipped and their expressions are - // returned in clippedScalarAccessExprs. - for (unsigned memRefDim = 0; memRefDim < clippedScalarAccessExprs.size(); - ++memRefDim) { - // Linear search on a small number of entries. - int loopIndex = -1; - auto exprs = transfer.permutation_map().getResults(); - for (auto en : llvm::enumerate(exprs)) { - auto expr = en.value(); - auto dim = expr.template dyn_cast(); - // Sanity check. - assert( - (dim || expr.template cast().getValue() == 0) && - "Expected dim or 0 in permutationMap"); - if (dim && memRefDim == dim.getPosition()) { - loopIndex = en.index(); - break; - } - } - - // We cannot distinguish atm between unrolled dimensions that implement - // the "always full" tile abstraction and need clipping from the other - // ones. So we conservatively clip everything. - using namespace edsc::op; - auto N = bounds.ub(memRefDim); - auto i = memRefAccess[memRefDim]; - if (loopIndex < 0) { - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i, N), i, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i, zero), zero, select_1); - } else { - auto ii = ivs[loopIndex]; - auto i_plus_ii = i + ii; - auto N_minus_1 = N - one; - auto select_1 = std_select(slt(i_plus_ii, N), i_plus_ii, N_minus_1); - clippedScalarAccessExprs[memRefDim] = - std_select(slt(i_plus_ii, zero), zero, select_1); - } - } - - return clippedScalarAccessExprs; -} - namespace mlir { template @@ -497,6 +441,60 @@ MemRefType VectorTransferRewriter::tmpMemRefType( {}, 0); } +static void emitWithBoundsChecks( + PatternRewriter &rewriter, VectorTransferOpInterface transfer, + ValueRange ivs, MemRefBoundsCapture &memRefBoundsCapture, + function_ref)> inBoundsFun, + function_ref)> outOfBoundsFun = nullptr) { + // Permute the incoming indices according to the permutation map. + SmallVector indices = + linalg::applyMapToValues(rewriter, transfer.getLoc(), + transfer.permutation_map(), transfer.indices()); + + // Generate a bounds check if necessary. + SmallVector majorIvsPlusOffsets; + Value inBoundsCondition = + emitInBoundsCondition(rewriter, transfer, 0, ivs, indices, + memRefBoundsCapture, majorIvsPlusOffsets); + + // Apply the permutation map to the ivs. The permutation map may not use all + // the inputs. + SmallVector scalarAccessExprs(transfer.indices().size()); + for (unsigned memRefDim = 0; memRefDim < transfer.indices().size(); + ++memRefDim) { + // Linear search on a small number of entries. + int loopIndex = -1; + auto exprs = transfer.permutation_map().getResults(); + for (auto en : llvm::enumerate(exprs)) { + auto expr = en.value(); + auto dim = expr.dyn_cast(); + // Sanity check. + assert((dim || expr.cast().getValue() == 0) && + "Expected dim or 0 in permutationMap"); + if (dim && memRefDim == dim.getPosition()) { + loopIndex = en.index(); + break; + } + } + + using namespace edsc::op; + auto i = transfer.indices()[memRefDim]; + scalarAccessExprs[memRefDim] = loopIndex < 0 ? i : i + ivs[loopIndex]; + } + + if (inBoundsCondition) + conditionBuilder( + /* scf.if */ inBoundsCondition, // { + [&] { inBoundsFun(scalarAccessExprs); }, + // } else { + outOfBoundsFun ? [&] { outOfBoundsFun(scalarAccessExprs); } + : function_ref() + // } + ); + else + inBoundsFun(scalarAccessExprs); +} + /// Lowers TransferReadOp into a combination of: /// 1. local memory allocation; /// 2. perfect loop nest over: @@ -588,17 +586,25 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( Value tmp = setAllocAtFunctionEntry(tmpMemRefType(transfer), transfer); StdIndexedValue local(tmp); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); + auto ivsStorage = llvm::to_vector<8>(loopIvs); // Swap the ivs which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); - Value pos = std_index_cast(IntegerType::get(32, ctx), ivsRef.back()); - Value scal = remote(indicesRef); - Value vector = vector_insert_element(scal, local(ivsRef.drop_back()), pos); - local(ivsRef.drop_back()) = vector; + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); + Value pos = std_index_cast(IntegerType::get(32, ctx), ivs.back()); + Value inVector = local(ivs.drop_back()); + auto loadValue = [&](ArrayRef indices) { + Value vector = vector_insert_element(remote(indices), inVector, pos); + local(ivs.drop_back()) = vector; + }; + auto loadPadding = [&](ArrayRef) { + Value vector = vector_insert_element(transfer.padding(), inVector, pos); + local(ivs.drop_back()) = vector; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, loadValue, loadPadding); }); Value vectorValue = std_load(vector_type_cast(tmp)); @@ -674,17 +680,21 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( Value vec = vector_type_cast(tmp); std_store(vectorValue, vec); loopNestBuilder(lbs, ubs, steps, [&](ValueRange loopIvs) { - auto ivs = llvm::to_vector<8>(loopIvs); - // Swap the ivs which will reorder memory accesses. + auto ivsStorage = llvm::to_vector<8>(loopIvs); + // Swap the ivsStorage which will reorder memory accesses. if (coalescedIdx >= 0) - std::swap(ivs.back(), ivs[coalescedIdx]); - // Computes clippedScalarAccessExprs in the loop nest scope (ivs exist). - SmallVector indices = clip(transfer, memRefBoundsCapture, ivs); - ArrayRef indicesRef(indices), ivsRef(ivs); + std::swap(ivsStorage.back(), ivsStorage[coalescedIdx]); + + ArrayRef ivs(ivsStorage); Value pos = - std_index_cast(IntegerType::get(32, op->getContext()), ivsRef.back()); - Value scalar = vector_extract_element(local(ivsRef.drop_back()), pos); - remote(indices) = scalar; + std_index_cast(IntegerType::get(32, op->getContext()), ivs.back()); + auto storeValue = [&](ArrayRef indices) { + Value scalar = vector_extract_element(local(ivs.drop_back()), pos); + remote(indices) = scalar; + }; + emitWithBoundsChecks( + rewriter, cast(transfer.getOperation()), ivs, + memRefBoundsCapture, storeValue); }); // 3. Erase. diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir index 5e8aea1f511352..ef1b2e995053c2 100644 --- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir +++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir @@ -15,11 +15,13 @@ func @materialize_read_1d() { %ip3 = affine.apply affine_map<(d0) -> (d0 + 3)> (%i1) %f4 = vector.transfer_read %A[%i0, %ip3], %f0 {permutation_map = affine_map<(d0, d1) -> (d0)>} : memref<7x42xf32>, vector<4xf32> // Both accesses in the load must be clipped otherwise %i1 + 2 and %i1 + 3 will go out of bounds. - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED1:.*]] = select - // CHECK: {{.*}} = select - // CHECK: %[[FILTERED2:.*]] = select - // CHECK: %{{.*}} = load {{.*}}[%[[FILTERED1]], %[[FILTERED2]]] : memref<7x42xf32> + // CHECK: scf.if + // CHECK-NEXT: load + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store + // CHECK-NEXT: else + // CHECK-NEXT: vector.insertelement + // CHECK-NEXT: store } } return @@ -53,7 +55,6 @@ func @materialize_read_1d_partially_specialized(%dyn1 : index, %dyn2 : index, %d // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL: func @materialize_read(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_read(%M: index, %N: index, %O: index, %P: index) { @@ -72,37 +73,18 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L0:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L1:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L2:.*]] = select - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}} : index - // CHECK-NEXT: {{.*}} = select - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[L3:.*]] = select - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[L1]], %[[L2]], %[[L3]]] : memref - // CHECK-NEXT: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[VEC:.*]] = load %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK: %[[L0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[L3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[SCAL:.*]] = load %{{.*}}[%[[L0]], %[[I1]], %[[I2]], %[[L3]]] : memref + // CHECK-NEXT: %[[RVEC:.*]] = vector.insertelement %[[SCAL]], %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK-NEXT: store %[[RVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } else { + // CHECK-NEXT: %[[CVEC:.*]] = vector.insertelement + // CHECK-NEXT: store %[[CVEC]], %[[ALLOC]][%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } @@ -132,7 +114,6 @@ func @materialize_read(%M: index, %N: index, %O: index, %P: index) { // ----- // CHECK: #[[$ADD:map[0-9]+]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECK: #[[$SUB:map[0-9]+]] = affine_map<()[s0] -> (s0 - 1)> // CHECK-LABEL:func @materialize_write(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: index) { func @materialize_write(%M: index, %N: index, %O: index, %P: index) { @@ -153,37 +134,15 @@ func @materialize_write(%M: index, %N: index, %O: index, %P: index) { // CHECK-NEXT: scf.for %[[I4:.*]] = %[[C0]] to %[[C3]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I5:.*]] = %[[C0]] to %[[C4]] step %[[C1]] { // CHECK-NEXT: scf.for %[[I6:.*]] = %[[C0]] to %[[C5]] step %[[C1]] { - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S0:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S1:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %{{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, %[[I2]], {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", %[[I2]], %[[C0]] : index - // CHECK-NEXT: %[[S2:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // - // CHECK-NEXT: {{.*}} = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) - // CHECK-NEXT: {{.*}} = affine.apply #[[$SUB]]()[%{{.*}}] - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = select {{.*}}, {{.*}}, {{.*}} : index - // CHECK-NEXT: {{.*}} = cmpi "slt", {{.*}}, %[[C0]] : index - // CHECK-NEXT: %[[S3:.*]] = select {{.*}}, %[[C0]], {{.*}} : index - // CHECK-NEXT: %[[VIDX:.*]] = index_cast %[[I4]] - // - // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> - // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> - // CHECK-NEXT: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[S2]], %[[S3]]] : memref + // CHECK: %[[VIDX:.*]] = index_cast %[[I4]] + // CHECK: %[[S0:.*]] = affine.apply #[[$ADD]](%[[I0]], %[[I4]]) + // CHECK: %[[S1:.*]] = affine.apply #[[$ADD]](%[[I1]], %[[I5]]) + // CHECK: %[[S3:.*]] = affine.apply #[[$ADD]](%[[I3]], %[[I6]]) + // CHECK-NEXT: scf.if + // CHECK-NEXT: %[[VEC:.*]] = load {{.*}}[%[[I6]], %[[I5]]] : memref<5x4xvector<3xf32>> + // CHECK-NEXT: %[[SCAL:.*]] = vector.extractelement %[[VEC]][%[[VIDX]] : i32] : vector<3xf32> + // CHECK: store %[[SCAL]], {{.*}}[%[[S0]], %[[S1]], %[[I2]], %[[S3]]] : memref + // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: } From 8d9c13f37d2081c11186718ae8b5aef8b507d152 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan Date: Tue, 8 Sep 2020 17:20:00 +0800 Subject: [PATCH 055/161] Revert "[PowerPC] Implement instruction clustering for stores" This reverts commit 3c0b3250230b3847a2a47dfeacfdb794c2285f02, (along with ea795304 and bb39eb9e) since it breaks test with UB sanitizer. --- llvm/lib/Target/PowerPC/PPC.td | 11 +- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 108 +------ llvm/lib/Target/PowerPC/PPCInstrInfo.h | 13 - llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 1 - llvm/lib/Target/PowerPC/PPCSubtarget.h | 2 - llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 4 - .../test/CodeGen/PowerPC/fusion-load-store.ll | 268 ------------------ .../PowerPC/pcrel-call-linkage-leaf.ll | 2 +- 8 files changed, 5 insertions(+), 404 deletions(-) delete mode 100644 llvm/test/CodeGen/PowerPC/fusion-load-store.ll diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 1b38a6f1d13d99..a617715d4bd86f 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -174,9 +174,6 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", "HasAddisLoadFusion", "true", "Power8 Addis-Load fusion", [FeatureFusion]>; -def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", - "Target supports store clustering", - [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -348,12 +345,10 @@ def ProcessorFeatures { // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. - list FusionFeatures = [FeatureStoreFusion]; list P10AdditionalFeatures = - !listconcat(FusionFeatures, [ - DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, - FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, - FeaturePairedVectorMemops]); + [DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, + FeaturePCRelativeMemops, FeatureP10Vector, FeatureMMA, + FeaturePairedVectorMemops]; list P10SpecificFeatures = []; list P10InheritableFeatures = !listconcat(P9InheritableFeatures, P10AdditionalFeatures); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 8cb8c82e628334..2423bca42e8052 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2222,111 +2222,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return true; } -bool PPCInstrInfo::getMemOperandsWithOffsetWidth( - const MachineInstr &LdSt, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, - const TargetRegisterInfo *TRI) const { - const MachineOperand *BaseOp; - if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI)) - return false; - BaseOps.push_back(BaseOp); - return true; -} - -static bool isLdStSafeToCluster(const MachineInstr &LdSt, - const TargetRegisterInfo *TRI) { - // If this is a volatile load/store, don't mess with it. - if (LdSt.hasOrderedMemoryRef()) - return false; - - if (LdSt.getOperand(2).isFI()) - return true; - - assert(LdSt.getOperand(2).isReg() && "Expected a reg operand."); - // Can't cluster if the instruction modifies the base register - // or it is update form. e.g. ld r2,3(r2) - if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI)) - return false; - - return true; -} - -// Only cluster instruction pair that have the same opcode, and they are -// clusterable according to PowerPC specification. -static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc, - const PPCSubtarget &Subtarget) { - switch (FirstOpc) { - default: - return false; - case PPC::STD: - case PPC::STFD: - case PPC::STXSD: - case PPC::DFSTOREf64: - return FirstOpc == SecondOpc; - // PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with - // 32bit and 64bit instruction selection. They are clusterable pair though - // they are different opcode. - case PPC::STW: - case PPC::STW8: - return SecondOpc == PPC::STW || SecondOpc == PPC::STW8; - } -} - -bool PPCInstrInfo::shouldClusterMemOps( - ArrayRef BaseOps1, - ArrayRef BaseOps2, unsigned NumLoads, - unsigned NumBytes) const { - - assert(BaseOps1.size() == 1 && BaseOps2.size() == 1); - const MachineOperand &BaseOp1 = *BaseOps1.front(); - const MachineOperand &BaseOp2 = *BaseOps2.front(); - assert((BaseOp1.isReg() || BaseOp1.isFI()) && - "Only base registers and frame indices are supported."); - - // The NumLoads means the number of loads that has been clustered. - // Don't cluster memory op if there are already two ops clustered at least. - if (NumLoads > 2) - return false; - - // Cluster the load/store only when they have the same base - // register or FI. - if ((BaseOp1.isReg() != BaseOp2.isReg()) || - (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) || - (BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex())) - return false; - - // Check if the load/store are clusterable according to the PowerPC - // specification. - const MachineInstr &FirstLdSt = *BaseOp1.getParent(); - const MachineInstr &SecondLdSt = *BaseOp2.getParent(); - unsigned FirstOpc = FirstLdSt.getOpcode(); - unsigned SecondOpc = SecondLdSt.getOpcode(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); - // Cluster the load/store only when they have the same opcode, and they are - // clusterable opcode according to PowerPC specification. - if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget)) - return false; - - // Can't cluster load/store that have ordered or volatile memory reference. - if (!isLdStSafeToCluster(FirstLdSt, TRI) || - !isLdStSafeToCluster(SecondLdSt, TRI)) - return false; - - int64_t Offset1 = 0, Offset2 = 0; - unsigned Width1 = 0, Width2 = 0; - const MachineOperand *Base1 = nullptr, *Base2 = nullptr; - if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) || - !getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) || - Width1 != Width2) - return false; - - assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 && - "getMemOperandWithOffsetWidth return incorrect base op"); - // The caller should already have ordered FirstMemOp/SecondMemOp by offset. - assert(Offset1 <= Offset2 && "Caller should have ordered offsets."); - return Offset1 + Width1 == Offset2; -} - /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// @@ -4769,8 +4664,7 @@ bool PPCInstrInfo::getMemOperandWithOffsetWidth( return false; // Handle only loads/stores with base register followed by immediate offset. - if (!LdSt.getOperand(1).isImm() || - (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) + if (LdSt.getNumExplicitOperands() != 3) return false; if (!LdSt.getOperand(1).isImm() || (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI())) diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 2f867b16aa24fb..75e8224892f4c5 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -494,19 +494,6 @@ class PPCInstrInfo : public PPCGenInstrInfo { int64_t &Offset, unsigned &Width, const TargetRegisterInfo *TRI) const; - /// Get the base operand and byte offset of an instruction that reads/writes - /// memory. - bool getMemOperandsWithOffsetWidth( - const MachineInstr &MI, SmallVectorImpl &BaseOps, - int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, - const TargetRegisterInfo *TRI) const override; - - /// Returns true if the two given memory operations should be scheduled - /// adjacent. - bool shouldClusterMemOps(ArrayRef BaseOps1, - ArrayRef BaseOps2, - unsigned NumLoads, unsigned NumBytes) const override; - /// Return true if two MIs access different memory addresses and false /// otherwise bool diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 05922dbb38fc6a..8021cfa4a18c6e 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -108,7 +108,6 @@ void PPCSubtarget::initializeEnvironment() { HasHTM = false; HasFloat128 = false; HasFusion = false; - HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index 0a134bb83ed2fa..76b43dfc7a723f 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -137,7 +137,6 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasHTM; bool HasFloat128; bool HasFusion; - bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; bool IsISA3_0; @@ -309,7 +308,6 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool isISA3_1() const { return IsISA3_1; } bool useLongCalls() const { return UseLongCalls; } bool hasFusion() const { return HasFusion; } - bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } bool needsSwapsForVSXMemOps() const { diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index c5671d6c73e055..ea9b37de6ff390 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -271,8 +271,6 @@ static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { std::make_unique(C)); // add DAG Mutations here. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); - if (ST.hasStoreFusion()) - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); @@ -287,8 +285,6 @@ static ScheduleDAGInstrs *createPPCPostMachineScheduler( std::make_unique(C) : std::make_unique(C), true); // add DAG Mutations here. - if (ST.hasStoreFusion()) - DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createPowerPCMacroFusionDAGMutation()); return DAG; diff --git a/llvm/test/CodeGen/PowerPC/fusion-load-store.ll b/llvm/test/CodeGen/PowerPC/fusion-load-store.ll deleted file mode 100644 index 75b2eca2168c0f..00000000000000 --- a/llvm/test/CodeGen/PowerPC/fusion-load-store.ll +++ /dev/null @@ -1,268 +0,0 @@ -; Test if several consecutive loads/stores can be clustered(fused) by scheduler. The -; scheduler will print "Cluster ld/st SU(x) - SU(y)" if SU(x) and SU(y) are fused. - -; REQUIRES: asserts -; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ -; RUN: -mattr=-paired-vector-memops,-pcrelative-memops -verify-misched \ -; RUN: -debug-only=machine-scheduler 2>&1 | FileCheck %s - -define i64 @store_i64(i64* nocapture %P, i64 %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 -; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 -; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 -; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64:%bb.0 -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 16 -; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 8 -; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 24 -; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 - %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 - store i64 %v, i64* %arrayidx - %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 - store i64 %v, i64* %arrayidx1 - %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 - store i64 %v, i64* %arrayidx2 - %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 - store i64 %v, i64* %arrayidx3 - ret i64 %v -} - -define i32 @store_i32(i32* nocapture %P, i32 %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, 52 -; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, 48 -; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, 44 -; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, 56 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32:%bb.0 -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], 48 -; CHECK: SU([[SU1]]): STW renamable $r[[REG]], 44 -; CHECK: SU([[SU2]]): STW renamable $r[[REG]], 52 -; CHECK: SU([[SU3]]): STW renamable $r[[REG]], 56 - %arrayidx = getelementptr inbounds i32, i32* %P, i32 13 - store i32 %v, i32* %arrayidx - %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 12 - store i32 %v, i32* %arrayidx1 - %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 11 - store i32 %v, i32* %arrayidx2 - %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 14 - store i32 %v, i32* %arrayidx3 - ret i32 %v -} - -define void @store_i64_neg(i64* nocapture %P, i64 %v) #0 { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, -24 -; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, -8 -; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, -16 -; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, -32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i64_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], -8 -; CHECK: SU([[SU1]]): STD renamable $x[[REG]], -16 -; CHECK: SU([[SU2]]): STD renamable $x[[REG]], -24 -; CHECK: SU([[SU3]]): STD renamable $x[[REG]], -32 - %arrayidx = getelementptr inbounds i64, i64* %P, i64 -3 - store i64 %v, i64* %arrayidx - %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 -1 - store i64 %v, i64* %arrayidx1 - %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 -2 - store i64 %v, i64* %arrayidx2 - %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 -4 - store i64 %v, i64* %arrayidx3 - ret void -} - -define void @store_i32_neg(i32* nocapture %P, i32 %v) #0 { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: SU([[SU2]]): STW %[[REG:[0-9]+]].sub_32:g8rc, -12 -; CHECK: SU([[SU3]]): STW %[[REG]].sub_32:g8rc, -4 -; CHECK: SU([[SU4]]): STW %[[REG]].sub_32:g8rc, -8 -; CHECK: SU([[SU5]]): STW %[[REG]].sub_32:g8rc, -16 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_neg:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK:SU([[SU0]]): STW renamable $r[[REG:[0-9]+]], -4 -; CHECK:SU([[SU1]]): STW renamable $r[[REG]], -8 -; CHECK:SU([[SU2]]): STW renamable $r[[REG]], -12 -; CHECK:SU([[SU3]]): STW renamable $r[[REG]], -16 - %arrayidx = getelementptr inbounds i32, i32* %P, i32 -3 - store i32 %v, i32* %arrayidx - %arrayidx1 = getelementptr inbounds i32, i32* %P, i32 -1 - store i32 %v, i32* %arrayidx1 - %arrayidx2 = getelementptr inbounds i32, i32* %P, i32 -2 - store i32 %v, i32* %arrayidx2 - %arrayidx3 = getelementptr inbounds i32, i32* %P, i32 -4 - store i32 %v, i32* %arrayidx3 - ret void -} - -define void @store_double(double* nocapture %P, double %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_double:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU2]]): DFSTOREf64 %[[REG:[0-9]+]]:vsfrc, 24 -; CHECK: SU([[SU3]]): DFSTOREf64 %[[REG]]:vsfrc, 8 -; CHECK: SU([[SU4]]): DFSTOREf64 %[[REG]]:vsfrc, 16 -; CHECK: SU([[SU5]]): DFSTOREf64 %[[REG]]:vsfrc, 32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_double:%bb.0 -; CHECK: Cluster ld/st SU([[SU0:[0-9]+]]) - SU([[SU1:[0-9]+]]) -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU0]]): STFD renamable $f[[REG:[0-9]+]], 8 -; CHECK: SU([[SU1]]): STFD renamable $f[[REG]], 16 -; CHECK: SU([[SU2]]): STFD renamable $f[[REG]], 24 -; CHECK: SU([[SU3]]): STFD renamable $f[[REG]], 32 - %arrayidx = getelementptr inbounds double, double* %P, i64 3 - store double %v, double* %arrayidx - %arrayidx1 = getelementptr inbounds double, double* %P, i64 1 - store double %v, double* %arrayidx1 - %arrayidx2 = getelementptr inbounds double, double* %P, i64 2 - store double %v, double* %arrayidx2 - %arrayidx3 = getelementptr inbounds double, double* %P, i64 4 - store double %v, double* %arrayidx3 - ret void -} - -define void @store_float(float* nocapture %P, float %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_float:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU2]]): DFSTOREf32 %[[REG:[0-9]+]]:vssrc, 12 -; CHECK: SU([[SU3]]): DFSTOREf32 %[[REG]]:vssrc, 4 -; CHECK: SU([[SU4]]): DFSTOREf32 %[[REG]]:vssrc, 8 -; CHECK: SU([[SU5]]): DFSTOREf32 %[[REG]]:vssrc, 16 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_float:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU0]]): STFS renamable $f[[REG:[0-9]+]], 12 -; CHECK: SU([[SU1]]): STFS renamable $f[[REG]], 4 -; CHECK: SU([[SU2]]): STFS renamable $f[[REG]], 8 -; CHECK: SU([[SU3]]): STFS renamable $f[[REG]], 16 - %arrayidx = getelementptr inbounds float, float* %P, i64 3 - store float %v, float* %arrayidx - %arrayidx1 = getelementptr inbounds float, float* %P, i64 1 - store float %v, float* %arrayidx1 - %arrayidx2 = getelementptr inbounds float, float* %P, i64 2 - store float %v, float* %arrayidx2 - %arrayidx3 = getelementptr inbounds float, float* %P, i64 4 - store float %v, float* %arrayidx3 - ret void -} - -; Cannot fuse the store/load if there is volatile in between -define i64 @store_volatile(i64* nocapture %P, i64 %v) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_volatile:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU2]]): STD %[[REG:[0-9]+]]:g8rc, 24 -; CHECK: SU([[SU3]]): STD %[[REG]]:g8rc, 16 -; CHECK: SU([[SU4]]): STD %[[REG]]:g8rc, 8 -; CHECK: SU([[SU5]]): STD %[[REG]]:g8rc, 32 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_volatile:%bb.0 -; CHECK-NOT: Cluster ld/st -; CHECK: SU([[SU0]]): STD renamable $x[[REG:[0-9]+]], 24 -; CHECK: SU([[SU1]]): STD renamable $x[[REG]], 16 -; CHECK: SU([[SU2]]): STD renamable $x[[REG]], 8 -; CHECK: SU([[SU3]]): STD renamable $x[[REG]], 32 - %arrayidx = getelementptr inbounds i64, i64* %P, i64 3 - store volatile i64 %v, i64* %arrayidx - %arrayidx1 = getelementptr inbounds i64, i64* %P, i64 2 - store volatile i64 %v, i64* %arrayidx1 - %arrayidx2 = getelementptr inbounds i64, i64* %P, i64 1 - store volatile i64 %v, i64* %arrayidx2 - %arrayidx3 = getelementptr inbounds i64, i64* %P, i64 4 - store volatile i64 %v, i64* %arrayidx3 - ret i64 %v -} - -@p = common local_unnamed_addr global [100 x i32] zeroinitializer, align 4 - -define void @store_i32_stw_stw8(i32 signext %m, i32 signext %n) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU8:[0-9]+]]) -; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 24 -; CHECK: SU([[SU8]]): STW %{{[0-9]+}}:gprc, 20 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU5:[0-9]+]]) - SU([[SU6:[0-9]+]]) -; CHECK: SU([[SU5]]): STW8 renamable $x{{[0-9]+}}, 24 -; CHECK: SU([[SU6]]): STW renamable $r{{[0-9]+}}, 20 - store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 - store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 - %add = add nsw i32 %n, %m - store i32 %add, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 5), align 4 - ret void -} - -define void @store_i32_stw8(i32 signext %m, i32 signext %n) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) -; CHECK: SU([[SU4]]): STW8 %{{[0-9]+}}:g8rc, 24 -; CHECK: SU([[SU5]]): STW8 %{{[0-9]+}}:g8rc, 28 -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_i32_stw8:%bb.0 -; CHECK: Cluster ld/st SU([[SU3:[0-9]+]]) - SU([[SU4:[0-9]+]]) -; CHECK: SU([[SU3]]): STW8 renamable $x{{[0-9]+}}, 24 -; CHECK: SU([[SU4]]): STW8 renamable $x{{[0-9]+}}, 28 - store i32 9, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 6), align 4 - store i32 %n, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @p, i64 0, i64 7), align 4 - ret void -} - -declare void @bar(i64*) - -define void @store_frame_index(i32 %a, i32 %b) { -entry: -; CHECK: ********** MI Scheduling ********** -; CHECK-LABEL: store_frame_index:%bb.0 -; CHECK: Cluster ld/st SU([[SU2:[0-9]+]]) - SU([[SU3:[0-9]+]]) -; CHECK: SU([[SU2]]): STD %{{[0-9]+}}:g8rc, 0, %stack.0.buf -; CHECK: SU([[SU3]]): STD %{{[0-9]+}}:g8rc, 8, %stack.0.buf - %buf = alloca [8 x i64], align 8 - %0 = bitcast [8 x i64]* %buf to i8* - %conv = zext i32 %a to i64 - %arrayidx = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 0 - store i64 %conv, i64* %arrayidx, align 8 - %conv1 = zext i32 %b to i64 - %arrayidx2 = getelementptr inbounds [8 x i64], [8 x i64]* %buf, i64 0, i64 1 - store i64 %conv1, i64* %arrayidx2, align 8 - call void @bar(i64* nonnull %arrayidx) - ret void -} diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll index 1623889200848a..9141fdc735a0ed 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -104,7 +104,6 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-P9-NOT: .localentry ; CHECK-ALL: # %bb.0: # %entry ; CHECK-S-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r11, r4, r3 ; CHECK-S-NEXT: sub r29, r8, r9 ; CHECK-S-NEXT: add r9, r10, r9 @@ -120,6 +119,7 @@ define dso_local signext i32 @X2IsCallerSaved(i32 signext %a, i32 signext %b, i3 ; CHECK-S-NEXT: mullw r3, r3, r7 ; CHECK-S-NEXT: sub r2, r6, r7 ; CHECK-S-NEXT: mullw r3, r3, r8 +; CHECK-S-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-S-NEXT: add r30, r8, r7 ; CHECK-S-NEXT: mullw r3, r3, r2 ; CHECK-S-NEXT: mullw r3, r3, r30 From 7aabb6ad7764366fd3150d18b16da9aef35e6492 Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Mon, 7 Sep 2020 10:39:14 +0100 Subject: [PATCH 056/161] [ARM][LowOverheadLoops] Remove modifications to the correct element count register After my patch at D86087, code that now uses the mov operand rather than the vctp operand will no longer remove modifications to the vctp operand as they should. This patch fixes that by explicitly removing modifications to the vctp operand rather than the register used as the element count. --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 7 ++++++- .../CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir | 5 ++--- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index a98590fd79c685..69e188fe5f888c 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -527,7 +527,12 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { }; MBB = VCTP->getParent(); - if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), NumElements)) { + // Remove modifications to the element count since they have no purpose in a + // tail predicated loop. Explicitly refer to the vctp operand no matter which + // register NumElements has been assigned to, since that is what the + // modifications will be using + if (auto *Def = RDA.getUniqueReachingMIDef(&MBB->back(), + VCTP->getOperand(1).getReg())) { SmallPtrSet ElementChain; SmallPtrSet Ignore = { VCTP }; unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode()); diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir index 9a5856335dfc67..210eae9e643509 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -173,11 +173,10 @@ body: | ; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 - ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: dead $r3 = tMOVr $r1, 14 /* CC::al */, $noreg ; CHECK: bb.3.do.body: ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) - ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3 - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2 ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.01, align 4) ; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll index 5a370e5f96e76c..1cf101ea5d5f1f 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -27,7 +27,6 @@ define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: .LBB0_3: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vldrw.u32 q2, [r0], #16 ; CHECK-NEXT: vsub.f32 q2, q2, q1 ; CHECK-NEXT: vfma.f32 q0, q2, q2 From 83d82d1fb1cfac06257ebbd7c063a3d2d1af20fb Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 8 Sep 2020 09:42:25 +0000 Subject: [PATCH 057/161] [mlir] Fix of broken build on windows caused by using uint --- mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index cd36c753b6f691..51781af9cb3049 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -371,7 +371,7 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite( template LogicalResult ConvOpVectorization::matchAndRewrite( ConvOp op, PatternRewriter &rewriter) const { - const uint dimSize = 3; + const unsigned dimSize = 3; Location loc = op.getLoc(); MLIRContext *context = op.getContext(); edsc::ScopedContext scope(rewriter, loc); @@ -402,8 +402,8 @@ LogicalResult ConvOpVectorization::matchAndRewrite( Value kernel = op.getInput(1); Value output = op.getOutputBuffer(0); - uint rank = inShapeType.getRank(); - uint numDims = mapping.size(); + unsigned rank = inShapeType.getRank(); + unsigned numDims = mapping.size(); Type elemType = inShapeType.getElementType(); auto map = AffineMap::get(rank, 0, mapping, context); From 2325d6b42f096bf93d2ab0bed7096759e5c96ce8 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Thu, 27 Aug 2020 09:43:14 +0000 Subject: [PATCH 058/161] [SyntaxTree] Ignore implicit non-leaf `CXXConstructExpr` Differential Revision: https://reviews.llvm.org/D86699 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 27 +- .../Tooling/Syntax/BuildTreeTest.cpp | 324 ++++++++++++++++-- 2 files changed, 325 insertions(+), 26 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index a9f326439a2a57..e5389ae4eff473 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -13,6 +13,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/IgnoreExpr.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/TypeLoc.h" @@ -44,8 +45,28 @@ using namespace clang; +// Ignores the implicit `CXXConstructExpr` for copy/move constructor calls +// generated by the compiler, as well as in implicit conversions like the one +// wrapping `1` in `X x = 1;`. +static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { + if (auto *C = dyn_cast(E)) { + auto NumArgs = C->getNumArgs(); + if (NumArgs == 1 || (NumArgs > 1 && isa(C->getArg(1)))) { + Expr *A = C->getArg(0); + if (C->getParenOrBraceRange().isInvalid()) + return A; + } + } + return E; +} + +static Expr *IgnoreImplicit(Expr *E) { + return IgnoreExprNodes(E, IgnoreImplicitSingleStep, + IgnoreImplicitConstructorSingleStep); +} + LLVM_ATTRIBUTE_UNUSED -static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } +static bool isImplicitExpr(Expr *E) { return IgnoreImplicit(E) != E; } namespace { /// Get start location of the Declarator from the TypeLoc. @@ -740,7 +761,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor { for (auto *D : DS->decls()) Builder.noticeDeclWithoutSemicolon(D); } else if (auto *E = dyn_cast_or_null(S)) { - return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); + return RecursiveASTVisitor::TraverseStmt(IgnoreImplicit(E)); } return RecursiveASTVisitor::TraverseStmt(S); } @@ -1579,7 +1600,7 @@ void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { if (!Child) return; - Child = Child->IgnoreImplicit(); + Child = IgnoreImplicit(Child); syntax::Tree *ChildNode = Mapping.find(Child); assert(ChildNode != nullptr); diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index aab20008a49748..fe89e0d7d1a2c1 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -1745,19 +1745,15 @@ TEST_P(SyntaxTreeTest, OverloadedOperator_Plus) { struct X { friend X operator+(X, const X&); }; -// FIXME: Remove additional `UnknownExpression` wrapping `x`. For that, ignore -// implicit copy constructor called on `x`. This should've been ignored already, -// as we `IgnoreImplicit` when traversing an `Stmt`. void test(X x, X y) { [[x + y]]; } )cpp", {R"txt( BinaryOperatorExpression Expression -|-UnknownExpression LeftHandSide -| `-IdExpression -| `-UnqualifiedId UnqualifiedId -| `-'x' +|-IdExpression LeftHandSide +| `-UnqualifiedId UnqualifiedId +| `-'x' |-'+' OperatorToken `-IdExpression RightHandSide `-UnqualifiedId UnqualifiedId @@ -3821,26 +3817,137 @@ TranslationUnit Detached )txt")); } +TEST_P(SyntaxTreeTest, InitDeclarator_Equal) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { S(int);}; +void test() { + [[S s = 1]]; +} +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken +)txt"})); +} + TEST_P(SyntaxTreeTest, InitDeclarator_Brace) { if (!GetParam().isCXX11OrLater()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( -int a {}; +struct S { + S(); + S(int); + S(int, float); +}; +void test(){ + // FIXME: 's...' is a declarator and '{...}' is initializer + [[S s0{}]]; + [[S s1{1}]]; + [[S s2{1, 2.}]]; +} )cpp", - R"txt( -TranslationUnit Detached -`-SimpleDeclaration - |-'int' - |-SimpleDeclarator Declarator - | |-'a' - | `-UnknownExpression - | `-UnknownExpression - | |-'{' - | `-'}' - `-';' -)txt")); + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s0' + |-'{' + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s1' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + `-UnknownExpression + |-'s2' + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' +)txt"})); +} + +TEST_P(SyntaxTreeTest, InitDeclarator_EqualBrace) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { + S(); + S(int); + S(int, float); +}; +void test() { + // FIXME: '= {...}' is initializer + [[S s0 = {}]]; + [[S s1 = {1}]]; + [[S s2 = {1, 2.}]]; +} +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s0' + |-'=' + `-UnknownExpression + |-'{' + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s1' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + `-'}' + )txt", + R"txt( +SimpleDeclaration +|-'S' +`-SimpleDeclarator Declarator + |-'s2' + |-'=' + `-UnknownExpression + |-'{' + |-IntegerLiteralExpression + | `-'1' LiteralToken + |-',' + |-FloatingLiteralExpression + | `-'2.' LiteralToken + `-'}' +)txt"})); } TEST_P(SyntaxTreeTest, InitDeclarator_Paren) { @@ -3851,15 +3958,134 @@ TEST_P(SyntaxTreeTest, InitDeclarator_Paren) { R"cpp( struct S { S(int); + S(int, float); }; -[[S s(1);]] +// FIXME: 's...' is a declarator and '(...)' is initializer +[[S s1(1);]] +[[S s2(1, 2.);]] )cpp", {R"txt( SimpleDeclaration |-'S' |-SimpleDeclarator Declarator | `-UnknownExpression -| |-'s' +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s2' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ImplicitConversion_Argument) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +void TakeX(const X&); +void test() { + [[TakeX(1)]]; +} +)cpp", + {R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'TakeX' +|-'(' OpenParen +|-CallArguments Arguments +| `-IntegerLiteralExpression ListElement +| `-'1' LiteralToken +`-')' CloseParen +)txt"})); +} + +TEST_P(SyntaxTreeTest, ImplicitConversion_Return) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +X CreateX(){ + [[return 1;]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-IntegerLiteralExpression ReturnValue +| `-'1' LiteralToken +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_ZeroArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(); +}; +X test() { + [[return X();]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'X' +| |-'(' +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, ConstructorCall_OneArgument) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int); +}; +X test() { + // FIXME: Remove `UnknownExpression` due to implicit `CXXFunctionalCastExpr` + [[return X(1);]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| `-UnknownExpression +| |-'X' | |-'(' | |-IntegerLiteralExpression | | `-'1' LiteralToken @@ -3868,6 +4094,58 @@ SimpleDeclaration )txt"})); } +TEST_P(SyntaxTreeTest, ConstructorCall_MultipleArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int, char); +}; +X test() { + [[return X(1, '2');]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'X' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-CharacterLiteralExpression +| | `-''2'' LiteralToken +| `-')' +`-';' +)txt"})); +} + +TEST_P(SyntaxTreeTest, TypeConversion_FunctionalNotation) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +float test() { + [[return float(1);]] +} +)cpp", + {R"txt( +ReturnStatement Statement +|-'return' IntroducerKeyword +|-UnknownExpression ReturnValue +| |-'float' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' +)txt"})); +} + TEST_P(SyntaxTreeTest, ArrayDeclarator_Simple) { EXPECT_TRUE(treeDumpEqual( R"cpp( From 46f4439dc9bf9b8cfee0001b6752c3d074c83b00 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Thu, 27 Aug 2020 09:44:09 +0000 Subject: [PATCH 059/161] [SyntaxTree] Ignore implicit leaf `CXXConstructExpr` Differential Revision: https://reviews.llvm.org/D86700 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 8 ++++++++ clang/unittests/Tooling/Syntax/BuildTreeTest.cpp | 15 ++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index e5389ae4eff473..72083eeefa31cb 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -1132,6 +1132,14 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } + bool WalkUpFromCXXConstructExpr(CXXConstructExpr *S) { + // Ignore the implicit calls to default constructors. + if ((S->getNumArgs() == 0 || isa(S->getArg(0))) && + S->getParenOrBraceRange().isInvalid()) + return true; + return RecursiveASTVisitor::WalkUpFromCXXConstructExpr(S); + } + bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { // To construct a syntax tree of the same shape for calls to built-in and // user-defined operators, ignore the `DeclRefExpr` that refers to the diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index fe89e0d7d1a2c1..00e18057d7be02 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -548,9 +548,6 @@ namespace n { struct S { }; } void test() { - // FIXME: Remove the `UnknownExpression` wrapping `s1` and `s2`. This - // `UnknownExpression` comes from a leaf `CXXConstructExpr` in the - // ClangAST. We need to ignore leaf implicit nodes. [[::n::S s1]]; [[n::S s2]]; } @@ -564,8 +561,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s1' + `-'s1' )txt", R"txt( SimpleDeclaration @@ -575,8 +571,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s2' + `-'s2' )txt"})); } @@ -608,8 +603,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s1' + `-'s1' )txt", R"txt( SimpleDeclaration @@ -623,8 +617,7 @@ SimpleDeclaration | `-'::' ListDelimiter |-'S' `-SimpleDeclarator Declarator - `-UnknownExpression - `-'s2' + `-'s2' )txt"})); } From 134455a07c1f1de4cff62a6afb4ccd98b98343ec Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 7 Sep 2020 08:40:49 +0000 Subject: [PATCH 060/161] [SyntaxTree] Ignore implicit `CXXFunctionalCastExpr` wrapping constructor Differential Revision: https://reviews.llvm.org/D87229 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 19 ++++++++++++++++++- .../Tooling/Syntax/BuildTreeTest.cpp | 12 +++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index 72083eeefa31cb..bb2b1494793a1f 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -14,6 +14,7 @@ #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/IgnoreExpr.h" +#include "clang/AST/OperationKinds.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Stmt.h" #include "clang/AST/TypeLoc.h" @@ -60,9 +61,25 @@ static Expr *IgnoreImplicitConstructorSingleStep(Expr *E) { return E; } +// In: +// struct X { +// X(int) +// }; +// X x = X(1); +// Ignores the implicit `CXXFunctionalCastExpr` that wraps +// `CXXConstructExpr X(1)`. +static Expr *IgnoreCXXFunctionalCastExprWrappingConstructor(Expr *E) { + if (auto *F = dyn_cast(E)) { + if (F->getCastKind() == CK_ConstructorConversion) + return F->getSubExpr(); + } + return E; +} + static Expr *IgnoreImplicit(Expr *E) { return IgnoreExprNodes(E, IgnoreImplicitSingleStep, - IgnoreImplicitConstructorSingleStep); + IgnoreImplicitConstructorSingleStep, + IgnoreCXXFunctionalCastExprWrappingConstructor); } LLVM_ATTRIBUTE_UNUSED diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 00e18057d7be02..7a106e9297b916 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -4069,7 +4069,6 @@ struct X { X(int); }; X test() { - // FIXME: Remove `UnknownExpression` due to implicit `CXXFunctionalCastExpr` [[return X(1);]] } )cpp", @@ -4077,12 +4076,11 @@ X test() { ReturnStatement Statement |-'return' IntroducerKeyword |-UnknownExpression ReturnValue -| `-UnknownExpression -| |-'X' -| |-'(' -| |-IntegerLiteralExpression -| | `-'1' LiteralToken -| `-')' +| |-'X' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' `-';' )txt"})); } From f5087d5c7248104b6580c7b079ed5f227332c2ef Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Mon, 7 Sep 2020 17:47:09 +0000 Subject: [PATCH 061/161] [SyntaxTree] Fix crash on functions with default arguments. * Do not visit `CXXDefaultArgExpr` * To build `CallArguments` nodes, just go through non-default arguments Differential Revision: https://reviews.llvm.org/D87249 --- clang/lib/Tooling/Syntax/BuildTree.cpp | 15 +- .../Tooling/Syntax/BuildTreeTest.cpp | 195 ++++++++++++++++++ 2 files changed, 209 insertions(+), 1 deletion(-) diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp index bb2b1494793a1f..1942290b5abc58 100644 --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -154,6 +154,13 @@ struct GetStartLoc : TypeLocVisitor { }; } // namespace +static CallExpr::arg_range dropDefaultArgs(CallExpr::arg_range Args) { + auto firstDefaultArg = std::find_if(Args.begin(), Args.end(), [](auto it) { + return isa(it); + }); + return llvm::make_range(Args.begin(), firstDefaultArg); +} + static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { switch (E.getOperator()) { // Comparison @@ -1111,7 +1118,11 @@ class BuildTreeVisitor : public RecursiveASTVisitor { return true; } - syntax::CallArguments *buildCallArguments(CallExpr::arg_range Args) { + /// Builds `CallArguments` syntax node from arguments that appear in source + /// code, i.e. not default arguments. + syntax::CallArguments * + buildCallArguments(CallExpr::arg_range ArgsAndDefaultArgs) { + auto Args = dropDefaultArgs(ArgsAndDefaultArgs); for (const auto &Arg : Args) { Builder.markExprChild(Arg, syntax::NodeRole::ListElement); const auto *DelimiterToken = @@ -1233,6 +1244,8 @@ class BuildTreeVisitor : public RecursiveASTVisitor { } } + bool WalkUpFromCXXDefaultArgExpr(CXXDefaultArgExpr *S) { return true; } + bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { auto Tokens = Builder.getDeclarationRange(S); if (Tokens.front().kind() == tok::coloncolon) { diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp index 7a106e9297b916..225885437267be 100644 --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -2733,6 +2733,54 @@ CallExpression Expression )txt"})); } +TEST_P(SyntaxTreeTest, CallExpression_DefaultArguments) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +void f(int i = 1, char c = '2'); +void test() { + [[f()]]; + [[f(1)]]; + [[f(1, '2')]]; +} +)cpp", + {R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +`-')' CloseParen + )txt", + R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +|-CallArguments Arguments +| `-IntegerLiteralExpression ListElement +| `-'1' LiteralToken +`-')' CloseParen + )txt", + R"txt( +CallExpression Expression +|-IdExpression Callee +| `-UnqualifiedId UnqualifiedId +| `-'f' +|-'(' OpenParen +|-CallArguments Arguments +| |-IntegerLiteralExpression ListElement +| | `-'1' LiteralToken +| |-',' ListDelimiter +| `-CharacterLiteralExpression ListElement +| `-''2'' LiteralToken +`-')' CloseParen +)txt"})); +} + TEST_P(SyntaxTreeTest, MultipleDeclaratorsGrouping) { EXPECT_TRUE(treeDumpEqual( R"cpp( @@ -3986,6 +4034,56 @@ SimpleDeclaration )txt"})); } +TEST_P(SyntaxTreeTest, InitDeclarator_Paren_DefaultArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct S { + S(int i = 1, float = 2.); +}; +[[S s0;]] +// FIXME: 's...' is a declarator and '(...)' is initializer +[[S s1(1);]] +[[S s2(1, 2.);]] +)cpp", + {R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-'s0' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s1' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| `-')' +`-';' + )txt", + R"txt( +SimpleDeclaration +|-'S' +|-SimpleDeclarator Declarator +| `-UnknownExpression +| |-'s2' +| |-'(' +| |-IntegerLiteralExpression +| | `-'1' LiteralToken +| |-',' +| |-FloatingLiteralExpression +| | `-'2.' LiteralToken +| `-')' +`-';' +)txt"})); +} + TEST_P(SyntaxTreeTest, ImplicitConversion_Argument) { if (!GetParam().isCXX()) { return; @@ -4114,6 +4212,48 @@ ReturnStatement Statement )txt"})); } +TEST_P(SyntaxTreeTest, ConstructorCall_DefaultArguments) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +struct X { + X(int i = 1, char c = '2'); +}; +X test() { + auto x0 = [[X()]]; + auto x1 = [[X(1)]]; + auto x2 = [[X(1, '2')]]; +} +)cpp", + {R"txt( +UnknownExpression +|-'X' +|-'(' +`-')' +)txt", + R"txt( +UnknownExpression +|-'X' +|-'(' +|-IntegerLiteralExpression +| `-'1' LiteralToken +`-')' +)txt", + R"txt( +UnknownExpression +|-'X' +|-'(' +|-IntegerLiteralExpression +| `-'1' LiteralToken +|-',' +|-CharacterLiteralExpression +| `-''2'' LiteralToken +`-')' +)txt"})); +} + TEST_P(SyntaxTreeTest, TypeConversion_FunctionalNotation) { if (!GetParam().isCXX()) { return; @@ -4375,6 +4515,61 @@ TranslationUnit Detached )txt")); } +TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InFreeFunctions_Default_One) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +int func1([[int a = 1]]); +)cpp", + {R"txt( +ParameterDeclarationList Parameters +`-SimpleDeclaration ListElement + |-'int' + `-SimpleDeclarator Declarator + |-'a' + |-'=' + `-IntegerLiteralExpression + `-'1' LiteralToken +)txt"})); +} + +TEST_P(SyntaxTreeTest, + ParametersAndQualifiers_InFreeFunctions_Default_Multiple) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqualOnAnnotations( + R"cpp( +int func2([[int *ap, int a = 1, char c = '2']]); +)cpp", + {R"txt( +ParameterDeclarationList Parameters +|-SimpleDeclaration ListElement +| |-'int' +| `-SimpleDeclarator Declarator +| |-'*' +| `-'ap' +|-',' ListDelimiter +|-SimpleDeclaration ListElement +| |-'int' +| `-SimpleDeclarator Declarator +| |-'a' +| |-'=' +| `-IntegerLiteralExpression +| `-'1' LiteralToken +|-',' ListDelimiter +`-SimpleDeclaration ListElement + |-'char' + `-SimpleDeclarator Declarator + |-'c' + |-'=' + `-CharacterLiteralExpression + `-''2'' LiteralToken +)txt"})); +} + TEST_P(SyntaxTreeTest, ParametersAndQualifiers_InVariadicFunctionTemplate_ParameterPack) { if (!GetParam().isCXX11OrLater() || GetParam().hasDelayedTemplateParsing()) { From 307dc7b236924b5eeb5bf46b725a67dcb41bcd89 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 8 Sep 2020 11:57:50 +0200 Subject: [PATCH 062/161] [mlir][VectorOps] Clean up outdated comments. NFCI. While there - De-templatify code that can use function_ref - Make BoundCaptures usable when they're const - Address post-submit review comment (static function into global namespace) --- .../mlir/Dialect/StandardOps/EDSC/Builders.h | 18 ++--- .../Conversion/VectorToSCF/VectorToSCF.cpp | 73 +++++-------------- 2 files changed, 26 insertions(+), 65 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h index 36df24f60c7045..ffb3ba30b699a3 100644 --- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h +++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Builders.h @@ -20,10 +20,10 @@ namespace edsc { class BoundsCapture { public: unsigned rank() const { return lbs.size(); } - Value lb(unsigned idx) { return lbs[idx]; } - Value ub(unsigned idx) { return ubs[idx]; } - int64_t step(unsigned idx) { return steps[idx]; } - std::tuple range(unsigned idx) { + Value lb(unsigned idx) const { return lbs[idx]; } + Value ub(unsigned idx) const { return ubs[idx]; } + int64_t step(unsigned idx) const { return steps[idx]; } + std::tuple range(unsigned idx) const { return std::make_tuple(lbs[idx], ubs[idx], steps[idx]); } void swapRanges(unsigned i, unsigned j) { @@ -34,9 +34,9 @@ class BoundsCapture { std::swap(steps[i], steps[j]); } - ArrayRef getLbs() { return lbs; } - ArrayRef getUbs() { return ubs; } - ArrayRef getSteps() { return steps; } + ArrayRef getLbs() const { return lbs; } + ArrayRef getUbs() const { return ubs; } + ArrayRef getSteps() const { return steps; } protected: SmallVector lbs; @@ -52,8 +52,6 @@ class BoundsCapture { class MemRefBoundsCapture : public BoundsCapture { public: explicit MemRefBoundsCapture(Value v); - MemRefBoundsCapture(const MemRefBoundsCapture &) = default; - MemRefBoundsCapture &operator=(const MemRefBoundsCapture &) = default; unsigned fastestVarying() const { return rank() - 1; } @@ -69,8 +67,6 @@ class VectorBoundsCapture : public BoundsCapture { public: explicit VectorBoundsCapture(Value v); explicit VectorBoundsCapture(VectorType t); - VectorBoundsCapture(const VectorBoundsCapture &) = default; - VectorBoundsCapture &operator=(const VectorBoundsCapture &) = default; private: Value base; diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 801ead825ffc94..0eb46f7ba3cfb6 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -108,8 +108,10 @@ class NDTransferOpHelper { private: /// Creates the loop nest on the "major" dimensions and calls the /// `loopBodyBuilder` lambda in the context of the loop nest. - template - void emitLoops(Lambda loopBodyBuilder); + void + emitLoops(llvm::function_ref + loopBodyBuilder); /// Common state to lower vector transfer ops. PatternRewriter &rewriter; @@ -129,10 +131,13 @@ class NDTransferOpHelper { VectorType minorVectorType; // vector<(minor_dims) x type> MemRefType memRefMinorVectorType; // memref> }; +} // namespace template -template -void NDTransferOpHelper::emitLoops(Lambda loopBodyBuilder) { +void NDTransferOpHelper::emitLoops( + llvm::function_ref + loopBodyBuilder) { /// Loop nest operates on the major dimensions MemRefBoundsCapture memrefBoundsCapture(xferOp.memref()); @@ -195,7 +200,7 @@ static Value emitInBoundsCondition(PatternRewriter &rewriter, VectorTransferOpInterface xferOp, unsigned leadingRank, ValueRange majorIvs, ValueRange majorOffsets, - MemRefBoundsCapture &memrefBounds, + const MemRefBoundsCapture &memrefBounds, SmallVectorImpl &majorIvsPlusOffsets) { Value inBoundsCondition; majorIvsPlusOffsets.reserve(majorIvs.size()); @@ -242,7 +247,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { /// Lambda to load 1-D vector in the current loop ivs + offset context. auto load1DVector = [&](ValueRange majorIvsPlusOffsets) -> Value { SmallVector indexing; @@ -341,7 +346,7 @@ LogicalResult NDTransferOpHelper::doReplace() { emitLoops([&](ValueRange majorIvs, ValueRange leadingOffsets, ValueRange majorOffsets, ValueRange minorOffsets, - MemRefBoundsCapture &memrefBounds) { + const MemRefBoundsCapture &memrefBounds) { // Lower to 1-D vector_transfer_write and let recursion handle it. auto emitTransferWrite = [&](ValueRange majorIvsPlusOffsets) { SmallVector indexing; @@ -390,8 +395,6 @@ LogicalResult NDTransferOpHelper::doReplace() { return success(); } -} // namespace - /// Analyzes the `transfer` to find an access dimension along the fastest remote /// MemRef dimension. If such a dimension with coalescing properties is found, /// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of @@ -422,8 +425,6 @@ static int computeCoalescedIndex(TransferOpTy transfer) { return coalescedIdx; } -namespace mlir { - template VectorTransferRewriter::VectorTransferRewriter( VectorTransferToSCFOptions options, MLIRContext *context) @@ -443,7 +444,7 @@ MemRefType VectorTransferRewriter::tmpMemRefType( static void emitWithBoundsChecks( PatternRewriter &rewriter, VectorTransferOpInterface transfer, - ValueRange ivs, MemRefBoundsCapture &memRefBoundsCapture, + ValueRange ivs, const MemRefBoundsCapture &memRefBoundsCapture, function_ref)> inBoundsFun, function_ref)> outOfBoundsFun = nullptr) { // Permute the incoming indices according to the permutation map. @@ -499,43 +500,13 @@ static void emitWithBoundsChecks( /// 1. local memory allocation; /// 2. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (with padding). /// 3. vector_load from local buffer (viewed as a memref<1 x vector>); /// 4. local memory deallocation. /// /// Lowers the data transfer part of a TransferReadOp while ensuring no /// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be read multiple -/// times and concurrently. -/// -/// Important notes about clipping and "full-tiles only" abstraction: -/// ================================================================= -/// When using clipping for dealing with boundary conditions, the same edge -/// value will appear multiple times (a.k.a edge padding). This is fine if the -/// subsequent vector operations are all data-parallel but **is generally -/// incorrect** in the presence of reductions or extract operations. -/// -/// More generally, clipping is a scalar abstraction that is expected to work -/// fine as a baseline for CPUs and GPUs but not for vector_load and DMAs. -/// To deal with real vector_load and DMAs, a "padded allocation + view" -/// abstraction with the ability to read out-of-memref-bounds (but still within -/// the allocated region) is necessary. -/// -/// Whether using scalar loops or vector_load/DMAs to perform the transfer, -/// junk values will be materialized in the vectors and generally need to be -/// filtered out and replaced by the "neutral element". This neutral element is -/// op-dependent so, in the future, we expect to create a vector filter and -/// apply it to a splatted constant vector with the proper neutral element at -/// each ssa-use. This filtering is not necessary for pure data-parallel -/// operations. -/// -/// In the case of vector_store/DMAs, Read-Modify-Write will be required, which -/// also have concurrency implications. Note that by using clipped scalar stores -/// in the presence of data-parallel only operations, we generate code that -/// writes the same value multiple time on the edge locations. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// padding. /// Performs the rewrite. template <> @@ -618,19 +589,11 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( /// 2. vector_store to local buffer (viewed as a memref<1 x vector>); /// 3. perfect loop nest over: /// a. scalar load from local buffers (viewed as a scalar memref); -/// a. scalar store to original memref (with clipping). +/// a. scalar store to original memref (if in bounds). /// 4. local memory deallocation. /// /// More specifically, lowers the data transfer part while ensuring no -/// out-of-bounds accesses are possible. Out-of-bounds behavior is handled by -/// clipping. This means that a given value in memory can be written to multiple -/// times and concurrently. -/// -/// See `Important notes about clipping and full-tiles only abstraction` in the -/// description of `readClipped` above. -/// -/// TODO: implement alternatives to clipping. -/// TODO: support non-data-parallel operations. +/// out-of-bounds accesses are possible. template <> LogicalResult VectorTransferRewriter::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { @@ -702,6 +665,8 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( return success(); } +namespace mlir { + void populateVectorToSCFConversionPatterns( OwningRewritePatternList &patterns, MLIRContext *context, const VectorTransferToSCFOptions &options) { From 58970eb7d1ddd067e98f49fdcfb04373086245bc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 11:59:38 +0100 Subject: [PATCH 063/161] [OpenMP] Fix typo in CodeGenFunction::EmitOMPWorksharingLoop (PR46412) Fixes issue noticed by static analysis where we have a copy+paste typo, testing ScheduleKind.M1 twice instead of ScheduleKind.M2. Differential Revision: https://reviews.llvm.org/D87250 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c1def6c88f0a66..b9260892bd2150 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2982,7 +2982,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || - ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || + ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; if ((RT.isStaticNonchunked(ScheduleKind.Schedule, From 847299d3f00507f172097bad9dde61dfad0d355b Mon Sep 17 00:00:00 2001 From: Ehsan Toosi Date: Thu, 20 Aug 2020 12:56:19 +0200 Subject: [PATCH 064/161] [mlir] remove BufferAssignmentPlacer from BufferAssignmentOpConversionPattern BufferPlacement has been removed, as allocations are no longer placed during the conversion. Differential Revision: https://reviews.llvm.org/D87079 --- .../include/mlir/Transforms/BufferPlacement.h | 52 +++---------------- .../Linalg/Transforms/TensorsToBuffers.cpp | 31 ++++------- mlir/lib/Transforms/BufferPlacement.cpp | 17 ------ .../lib/Transforms/TestBufferPlacement.cpp | 31 ++++------- 4 files changed, 28 insertions(+), 103 deletions(-) diff --git a/mlir/include/mlir/Transforms/BufferPlacement.h b/mlir/include/mlir/Transforms/BufferPlacement.h index b3db7794fd9712..6d88ac3599cf17 100644 --- a/mlir/include/mlir/Transforms/BufferPlacement.h +++ b/mlir/include/mlir/Transforms/BufferPlacement.h @@ -24,34 +24,6 @@ namespace mlir { -/// Prepares a buffer placement phase. It can place (user-defined) alloc -/// nodes. This simplifies the integration of the actual buffer-placement -/// pass. Sample usage: -/// BufferAssignmentPlacer baHelper(regionOp); -/// -> determine alloc positions -/// auto allocPosition = baHelper.computeAllocPosition(value); -/// -> place alloc -/// allocBuilder.setInsertionPoint(positions.getAllocPosition()); -/// -/// Note: this class is intended to be used during legalization. In order -/// to move alloc and dealloc nodes into the right places you can use the -/// createBufferPlacementPass() function. -class BufferAssignmentPlacer { -public: - /// Creates a new assignment builder. - explicit BufferAssignmentPlacer(Operation *op); - - /// Returns the operation this analysis was constructed from. - Operation *getOperation() const { return operation; } - - /// Computes the actual position to place allocs for the given result. - OpBuilder::InsertPoint computeAllocPosition(OpResult result); - -private: - /// The operation this analysis was constructed from. - Operation *operation; -}; - /// A helper type converter class for using inside Buffer Assignment operation /// conversion patterns. The default constructor keeps all the types intact /// except for the ranked-tensor types which is converted to memref types. @@ -157,31 +129,20 @@ class BufferAssignmentTypeConverter : public TypeConverter { SmallVector decomposeTypeConversions; }; -/// Helper conversion pattern that encapsulates a BufferAssignmentPlacer -/// instance. Sample usage: -/// class CustomConversionPattern : public -/// BufferAssignmentOpConversionPattern -/// { -/// ... matchAndRewrite(...) { -/// -> Access stored BufferAssignmentPlacer -/// bufferAssignment->computeAllocPosition(resultOp); -/// } -/// }; +/// Helper conversion pattern that encapsulates a BufferAssignmentTypeConverter +/// instance. template class BufferAssignmentOpConversionPattern : public OpConversionPattern { public: explicit BufferAssignmentOpConversionPattern( - MLIRContext *context, BufferAssignmentPlacer *bufferAssignment = nullptr, - BufferAssignmentTypeConverter *converter = nullptr, + MLIRContext *context, BufferAssignmentTypeConverter *converter, PatternBenefit benefit = 1) - : OpConversionPattern(context, benefit), - bufferAssignment(bufferAssignment), converter(converter) { + : OpConversionPattern(context, benefit), converter(converter) { assert(converter && "The type converter has not been defined"); } protected: - BufferAssignmentPlacer *bufferAssignment; BufferAssignmentTypeConverter *converter; }; @@ -282,8 +243,7 @@ class BufferAssignmentCallOpConverter template static void populateWithBufferAssignmentOpConversionPatterns( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { // clang-format off patterns->insert< @@ -291,7 +251,7 @@ static void populateWithBufferAssignmentOpConversionPatterns( BufferAssignmentFuncOpConverter, BufferAssignmentReturnOpConverter - >(context, placer, converter); + >(context, converter); // clang-format on } } // end namespace mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp index 89a01f9ca6292f..6af0067c8928c3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/TensorsToBuffers.cpp @@ -51,11 +51,6 @@ class GenericOpConverter return rewriter.notifyMatchFailure( op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - - // Compute alloc position and insert a custom allocation node. - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result)); auto alloc = rewriter.create(loc, memrefType); newArgs.push_back(alloc); newResults.push_back(alloc); @@ -99,13 +94,12 @@ class GenericOpConverter /// Populate the given list with patterns to convert Linalg operations on /// tensors to buffers. static void populateConvertLinalgOnTensorsToBuffersPattern( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); - patterns->insert(context, placer, converter); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, + patterns); + patterns->insert(context, converter); } /// Converts Linalg operations that work on tensor-type operands or results to @@ -119,6 +113,8 @@ struct ConvertLinalgOnTensorsToBuffers // Mark all Standard operations legal. target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -144,16 +140,11 @@ struct ConvertLinalgOnTensorsToBuffers converter.setResultConversionKind( BufferAssignmentTypeConverter::AppendToArgumentsList); - // Walk over all the functions to apply buffer assignment. - getOperation().walk([&](FuncOp function) -> WalkResult { - OwningRewritePatternList patterns; - BufferAssignmentPlacer placer(function); - populateConvertLinalgOnTensorsToBuffersPattern(&context, &placer, - &converter, &patterns); - - // Applying full conversion - return applyFullConversion(function, target, patterns); - }); + OwningRewritePatternList patterns; + populateConvertLinalgOnTensorsToBuffersPattern(&context, &converter, + &patterns); + if (failed(applyFullConversion(this->getOperation(), target, patterns))) + this->signalPassFailure(); } }; } // end anonymous namespace diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 1ab3e7e2e48dcb..0279129758ab83 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -681,20 +681,6 @@ struct BufferPlacementPass : BufferPlacementBase { } // end anonymous namespace -//===----------------------------------------------------------------------===// -// BufferAssignmentPlacer -//===----------------------------------------------------------------------===// - -/// Creates a new assignment placer. -BufferAssignmentPlacer::BufferAssignmentPlacer(Operation *op) : operation(op) {} - -/// Computes the actual position to place allocs for the given value. -OpBuilder::InsertPoint -BufferAssignmentPlacer::computeAllocPosition(OpResult result) { - Operation *owner = result.getOwner(); - return OpBuilder::InsertPoint(owner->getBlock(), Block::iterator(owner)); -} - //===----------------------------------------------------------------------===// // BufferAssignmentTypeConverter //===----------------------------------------------------------------------===// @@ -891,9 +877,6 @@ LogicalResult BufferAssignmentCallOpConverter::matchAndRewrite( resultMapping.addMapping(newResultTypes.size() - 1); } else { // kind = BufferAssignmentTypeConverter::AppendToArgumentsList - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result.value())); MemRefType memref = converted.dyn_cast(); if (!memref) return callOp.emitError("Cannot allocate for a non-Memref type"); diff --git a/mlir/test/lib/Transforms/TestBufferPlacement.cpp b/mlir/test/lib/Transforms/TestBufferPlacement.cpp index 14b72b9fc92a02..c338f0f37c4eae 100644 --- a/mlir/test/lib/Transforms/TestBufferPlacement.cpp +++ b/mlir/test/lib/Transforms/TestBufferPlacement.cpp @@ -65,11 +65,6 @@ struct TestBufferPlacementPreparationPass op, "dynamic shapes not currently supported"); auto memrefType = MemRefType::get(type.getShape(), type.getElementType()); - - // Compute alloc position and insert a custom allocation node. - OpBuilder::InsertionGuard guard(rewriter); - rewriter.restoreInsertionPoint( - bufferAssignment->computeAllocPosition(result)); auto alloc = rewriter.create(loc, memrefType); newArgs.push_back(alloc); newResults.push_back(alloc); @@ -110,13 +105,12 @@ struct TestBufferPlacementPreparationPass }; void populateTensorLinalgToBufferLinalgConversionPattern( - MLIRContext *context, BufferAssignmentPlacer *placer, - BufferAssignmentTypeConverter *converter, + MLIRContext *context, BufferAssignmentTypeConverter *converter, OwningRewritePatternList *patterns) { populateWithBufferAssignmentOpConversionPatterns< - mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, placer, - converter, patterns); - patterns->insert(context, placer, converter); + mlir::ReturnOp, mlir::ReturnOp, linalg::CopyOp>(context, converter, + patterns); + patterns->insert(context, converter); } void getDependentDialects(DialectRegistry ®istry) const override { @@ -133,6 +127,8 @@ struct TestBufferPlacementPreparationPass target.addLegalDialect(); target.addLegalOp(); target.addLegalOp(); + target.addLegalOp(); + target.addLegalOp(); // Mark all Linalg operations illegal as long as they work on tensors. auto isLegalOperation = [&](Operation *op) { @@ -191,16 +187,11 @@ struct TestBufferPlacementPreparationPass return success(); }); - // Walk over all the functions to apply buffer assignment. - this->getOperation().walk([&](FuncOp function) -> WalkResult { - OwningRewritePatternList patterns; - BufferAssignmentPlacer placer(function); - populateTensorLinalgToBufferLinalgConversionPattern( - &context, &placer, &converter, &patterns); - - // Applying full conversion - return applyFullConversion(function, target, patterns); - }); + OwningRewritePatternList patterns; + populateTensorLinalgToBufferLinalgConversionPattern(&context, &converter, + &patterns); + if (failed(applyFullConversion(this->getOperation(), target, patterns))) + this->signalPassFailure(); }; }; } // end anonymous namespace From 25c3fa3f13336b2da7c63162b0d9da164a0a96a1 Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Tue, 8 Sep 2020 19:55:14 +0800 Subject: [PATCH 065/161] [DWARFYAML] Make the debug_ranges section optional. This patch makes the debug_ranges section optional. When we specify an empty debug_ranges section, yaml2obj only emits the section header. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87263 --- llvm/include/llvm/ObjectYAML/DWARFYAML.h | 2 +- llvm/lib/ObjectYAML/DWARFEmitter.cpp | 2 +- llvm/lib/ObjectYAML/DWARFYAML.cpp | 5 +-- .../ObjectYAML/MachO/DWARF-debug_ranges.yaml | 45 +++++++++++++++++++ .../yaml2obj/ELF/DWARF/debug-ranges.yaml | 14 ++++++ llvm/tools/obj2yaml/dwarf2yaml.cpp | 5 ++- 6 files changed, 67 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h index 99a7af87d2c78d..3e5be41b8fa3bb 100644 --- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h +++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h @@ -214,7 +214,7 @@ struct Data { Optional> DebugStrings; Optional> DebugStrOffsets; Optional> DebugAranges; - std::vector DebugRanges; + Optional> DebugRanges; Optional> DebugAddr; Optional PubNames; Optional PubTypes; diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp index bf29f40579ceb2..b634f7c123e8d5 100644 --- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp +++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp @@ -190,7 +190,7 @@ Error DWARFYAML::emitDebugAranges(raw_ostream &OS, const DWARFYAML::Data &DI) { Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) { const size_t RangesOffset = OS.tell(); uint64_t EntryIndex = 0; - for (auto DebugRanges : DI.DebugRanges) { + for (auto DebugRanges : *DI.DebugRanges) { const size_t CurrOffset = OS.tell() - RangesOffset; if (DebugRanges.Offset && (uint64_t)*DebugRanges.Offset < CurrOffset) return createStringError(errc::invalid_argument, diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp index 353e5058a0e5df..975b9b40b6b188 100644 --- a/llvm/lib/ObjectYAML/DWARFYAML.cpp +++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp @@ -28,7 +28,7 @@ SetVector DWARFYAML::Data::getNonEmptySectionNames() const { SecNames.insert("debug_str"); if (DebugAranges) SecNames.insert("debug_aranges"); - if (!DebugRanges.empty()) + if (DebugRanges) SecNames.insert("debug_ranges"); if (!DebugLines.empty()) SecNames.insert("debug_line"); @@ -95,8 +95,7 @@ void MappingTraits::mapping(IO &IO, DWARFYAML::Data &DWARF) { IO.mapOptional("debug_str", DWARF.DebugStrings); IO.mapOptional("debug_abbrev", DWARF.DebugAbbrev); IO.mapOptional("debug_aranges", DWARF.DebugAranges); - if (!DWARF.DebugRanges.empty() || !IO.outputting()) - IO.mapOptional("debug_ranges", DWARF.DebugRanges); + IO.mapOptional("debug_ranges", DWARF.DebugRanges); IO.mapOptional("debug_pubnames", DWARF.PubNames); IO.mapOptional("debug_pubtypes", DWARF.PubTypes); DWARFCtx.IsGNUPubSec = true; diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml index 8948bf92b7d76a..30997ba1144b62 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml @@ -239,3 +239,48 @@ DWARF: - AbbrCode: 0x00000000 Values: [] ... + +## Test generating and dumping an empty __debug_ranges section. + +# RUN: yaml2obj --docnum=2 %s | obj2yaml | FileCheck %s --check-prefix=EMPTY + +# EMPTY: DWARF: +# EMPTY-NEXT: debug_ranges: [] +# EMPTY-NEXT: ... + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x0000000A + ncmds: 1 + sizeofcmds: 232 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DWARF + vmaddr: 0x00 + vmsize: 0x00 + fileoff: 0x00 + filesize: 0x00 + maxprot: 0 + initprot: 0 + nsects: 1 + flags: 0 + Sections: + - sectname: __debug_ranges + segname: __DWARF + addr: 0x00 + size: [[SIZE=0]] + offset: 0x210 + align: 0 + reloff: 0x00000000 + nreloc: 0 + flags: 0x00000000 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: [[CONTENT=]] diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml index 6a9cd7a6195e7f..f80dd6de53689c 100644 --- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml +++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-ranges.yaml @@ -407,3 +407,17 @@ DWARF: Entries: - LowOffset: 0x1234 HighOffset: 0x5678 + +## l) Test that the .debug_ranges section header is emitted if the "debug_ranges" +## entry is empty. + +# RUN: yaml2obj --docnum=12 %s -o %t12.o +# RUN: llvm-readobj -S %t12.o | FileCheck -DSIZE=0 -DADDRALIGN=1 %s --check-prefix=DWARF-HEADER + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +DWARF: + debug_ranges: [] diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp index cef7b699805c86..1dcf6d42d6ada7 100644 --- a/llvm/tools/obj2yaml/dwarf2yaml.cpp +++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp @@ -114,6 +114,7 @@ Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { DCtx.isLittleEndian(), AddrSize); uint64_t Offset = 0; DWARFDebugRangeList DwarfRanges; + std::vector DebugRanges; while (Data.isValidOffset(Offset)) { DWARFYAML::Ranges YamlRanges; @@ -123,8 +124,10 @@ Error dumpDebugRanges(DWARFContext &DCtx, DWARFYAML::Data &Y) { return E; for (const auto &RLE : DwarfRanges.getEntries()) YamlRanges.Entries.push_back({RLE.StartAddress, RLE.EndAddress}); - Y.DebugRanges.push_back(std::move(YamlRanges)); + DebugRanges.push_back(std::move(YamlRanges)); } + + Y.DebugRanges = DebugRanges; return ErrorSuccess(); } From 0729ae367af07c2c75d08cfa881795b325fcf922 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 12:45:08 +0100 Subject: [PATCH 066/161] X86DomainReassignment.cpp - improve auto const/pointer/reference qualifiers. NFCI. Fix clang-tidy warnings by ensuring auto variables are more cleanly qualified, or just avoid auto entirely. --- llvm/lib/Target/X86/X86DomainReassignment.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp index 488ee51f1d89bb..3a0d6a52ef4636 100644 --- a/llvm/lib/Target/X86/X86DomainReassignment.cpp +++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp @@ -141,7 +141,7 @@ class InstrReplacer : public InstrConverterBase { return false; // It's illegal to replace an instruction that implicitly defines a register // with an instruction that doesn't, unless that register dead. - for (auto &MO : MI->implicit_operands()) + for (const auto &MO : MI->implicit_operands()) if (MO.isReg() && MO.isDef() && !MO.isDead() && !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg())) return false; @@ -180,7 +180,7 @@ class InstrReplacerDstCOPY : public InstrConverterBase { MachineRegisterInfo *MRI) const override { assert(isLegal(MI, TII) && "Cannot convert instruction"); MachineBasicBlock *MBB = MI->getParent(); - auto &DL = MI->getDebugLoc(); + const DebugLoc &DL = MI->getDebugLoc(); Register Reg = MRI->createVirtualRegister( TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), @@ -237,7 +237,7 @@ class InstrCOPYReplacer : public InstrReplacer { MachineRegisterInfo *MRI) const override { assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY"); - for (auto &MO : MI->operands()) { + for (const auto &MO : MI->operands()) { // Physical registers will not be converted. Assume that converting the // COPY to the destination domain will eventually result in a actual // instruction. @@ -517,7 +517,7 @@ void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const { } } - for (auto MI : ToErase) + for (auto *MI : ToErase) MI->eraseFromParent(); } @@ -537,7 +537,7 @@ static bool usedAsAddr(const MachineInstr &MI, unsigned Reg, for (unsigned MemOpIdx = MemOpStart, MemOpEnd = MemOpStart + X86::AddrNumOperands; MemOpIdx < MemOpEnd; ++MemOpIdx) { - auto &Op = MI.getOperand(MemOpIdx); + const MachineOperand &Op = MI.getOperand(MemOpIdx); if (Op.isReg() && Op.getReg() == Reg) return true; } From fcff2c32c0f3a85f7fce02a120de3f1b5778252c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 12:46:00 +0100 Subject: [PATCH 067/161] X86CallLowering.cpp - improve auto const/pointer/reference qualifiers. NFCI. Fix clang-tidy warnings by ensuring auto variables are more cleanly qualified, or just avoid auto entirely. --- llvm/lib/Target/X86/X86CallLowering.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp index 0286482ac9af8e..8342cad45dfd0f 100644 --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -148,9 +148,9 @@ struct X86OutgoingValueHandler : public CallLowering::IncomingValueHandler { MachineFunction &MF = MIRBuilder.getMF(); Register ExtReg = extendRegister(ValVReg, VA); - auto MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, - VA.getLocVT().getStoreSize(), - inferAlignFromPtrInfo(MF, MPO)); + auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, + VA.getLocVT().getStoreSize(), + inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildStore(ExtReg, Addr, *MMO); } @@ -194,7 +194,7 @@ bool X86CallLowering::lowerReturn( MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - auto &DL = MF.getDataLayout(); + const DataLayout &DL = MF.getDataLayout(); LLVMContext &Ctx = Val->getType()->getContext(); const X86TargetLowering &TLI = *getTLI(); @@ -245,7 +245,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler { void assignValueToAddress(Register ValVReg, Register Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { MachineFunction &MF = MIRBuilder.getMF(); - auto MMO = MF.getMachineMemOperand( + auto *MMO = MF.getMachineMemOperand( MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, inferAlignFromPtrInfo(MF, MPO)); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); @@ -337,8 +337,7 @@ bool X86CallLowering::lowerFormalArguments( SmallVector SplitArgs; unsigned Idx = 0; - for (auto &Arg : F.args()) { - + for (const auto &Arg : F.args()) { // TODO: handle not simple cases. if (Arg.hasAttribute(Attribute::ByVal) || Arg.hasAttribute(Attribute::InReg) || @@ -377,10 +376,10 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - auto &DL = F.getParent()->getDataLayout(); + const DataLayout &DL = F.getParent()->getDataLayout(); const X86Subtarget &STI = MF.getSubtarget(); const TargetInstrInfo &TII = *STI.getInstrInfo(); - auto TRI = STI.getRegisterInfo(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); // Handle only Linux C, X86_64_SysV calling conventions for now. if (!STI.isTargetLinux() || !(Info.CallConv == CallingConv::C || From ae85da86ad8fbd022129650d0b2a6b615709a790 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 13:01:09 +0100 Subject: [PATCH 068/161] [Codegen][X86] Begin moving X86 specific codegen tests into X86 subfolder. Discussed with @craig.topper and @spatel - this is to try and tidyup the codegen folder and move the x86 specific tests (as opposed to general tests that just happen to use x86 triples) into subfolders. Its up to other targets if they follow suit. It also helps speed up test iterations as using wildcards on lit commands often misses some filenames. --- clang/test/CodeGen/{ => X86}/x86-64-inline-asm.c | 0 clang/test/CodeGen/{ => X86}/x86-GCC-inline-asm-Y-constraints.c | 0 clang/test/CodeGen/{ => X86}/x86-atomic-long_double.c | 0 clang/test/CodeGen/{ => X86}/x86-bswap.c | 0 clang/test/CodeGen/{ => X86}/x86-builtins-vector-width.c | 0 clang/test/CodeGen/{ => X86}/x86-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-cf-protection.c | 0 clang/test/CodeGen/{ => X86}/x86-crc-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-enqcmd-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-inline-asm-min-vector-width.c | 0 clang/test/CodeGen/{ => X86}/x86-inline-asm-v-constraint.c | 0 clang/test/CodeGen/{ => X86}/x86-long-double.cpp | 0 clang/test/CodeGen/{ => X86}/x86-nontemporal.c | 0 clang/test/CodeGen/{ => X86}/x86-serialize-intrin.c | 0 clang/test/CodeGen/{ => X86}/x86-soft-float.c | 0 clang/test/CodeGen/{ => X86}/x86-tsxldtrk-builtins.c | 0 clang/test/CodeGen/{ => X86}/x86-vec-i128.c | 0 clang/test/CodeGen/{ => X86}/x86-vec-struct-packing.c | 0 clang/test/CodeGen/{ => X86}/x86-vector-width.c | 0 clang/test/CodeGen/{ => X86}/x86.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-darwin.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-iamcu.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-linux.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-nommx.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-realign.c | 0 clang/test/CodeGen/{ => X86}/x86_32-arguments-win32.c | 0 clang/test/CodeGen/{ => X86}/x86_32-fpcc-struct-return.c | 0 clang/test/CodeGen/{ => X86}/x86_32-inline-asm.c | 0 clang/test/CodeGen/{ => X86}/x86_32-xsave.c | 0 clang/test/CodeGen/{ => X86}/x86_64-PR42672.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments-darwin.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments-nacl.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments-win32.c | 0 clang/test/CodeGen/{ => X86}/x86_64-arguments.c | 0 clang/test/CodeGen/{ => X86}/x86_64-atomic-128.c | 0 clang/test/CodeGen/{ => X86}/x86_64-floatvectors.c | 0 clang/test/CodeGen/{ => X86}/x86_64-instrument-functions.c | 0 clang/test/CodeGen/{ => X86}/x86_64-longdouble.c | 0 clang/test/CodeGen/{ => X86}/x86_64-mno-sse.c | 0 clang/test/CodeGen/{ => X86}/x86_64-mno-sse2.c | 0 clang/test/CodeGen/{ => X86}/x86_64-profiling-keep-fp.c | 0 clang/test/CodeGen/{ => X86}/x86_64-xsave.c | 0 clang/test/CodeGen/{ => X86}/x86_inlineasm_curly_bracket_escape.c | 0 43 files changed, 0 insertions(+), 0 deletions(-) rename clang/test/CodeGen/{ => X86}/x86-64-inline-asm.c (100%) rename clang/test/CodeGen/{ => X86}/x86-GCC-inline-asm-Y-constraints.c (100%) rename clang/test/CodeGen/{ => X86}/x86-atomic-long_double.c (100%) rename clang/test/CodeGen/{ => X86}/x86-bswap.c (100%) rename clang/test/CodeGen/{ => X86}/x86-builtins-vector-width.c (100%) rename clang/test/CodeGen/{ => X86}/x86-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-cf-protection.c (100%) rename clang/test/CodeGen/{ => X86}/x86-crc-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-enqcmd-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-inline-asm-min-vector-width.c (100%) rename clang/test/CodeGen/{ => X86}/x86-inline-asm-v-constraint.c (100%) rename clang/test/CodeGen/{ => X86}/x86-long-double.cpp (100%) rename clang/test/CodeGen/{ => X86}/x86-nontemporal.c (100%) rename clang/test/CodeGen/{ => X86}/x86-serialize-intrin.c (100%) rename clang/test/CodeGen/{ => X86}/x86-soft-float.c (100%) rename clang/test/CodeGen/{ => X86}/x86-tsxldtrk-builtins.c (100%) rename clang/test/CodeGen/{ => X86}/x86-vec-i128.c (100%) rename clang/test/CodeGen/{ => X86}/x86-vec-struct-packing.c (100%) rename clang/test/CodeGen/{ => X86}/x86-vector-width.c (100%) rename clang/test/CodeGen/{ => X86}/x86.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-darwin.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-iamcu.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-linux.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-nommx.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-realign.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-arguments-win32.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-fpcc-struct-return.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-inline-asm.c (100%) rename clang/test/CodeGen/{ => X86}/x86_32-xsave.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-PR42672.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments-darwin.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments-nacl.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments-win32.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-arguments.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-atomic-128.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-floatvectors.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-instrument-functions.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-longdouble.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-mno-sse.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-mno-sse2.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-profiling-keep-fp.c (100%) rename clang/test/CodeGen/{ => X86}/x86_64-xsave.c (100%) rename clang/test/CodeGen/{ => X86}/x86_inlineasm_curly_bracket_escape.c (100%) diff --git a/clang/test/CodeGen/x86-64-inline-asm.c b/clang/test/CodeGen/X86/x86-64-inline-asm.c similarity index 100% rename from clang/test/CodeGen/x86-64-inline-asm.c rename to clang/test/CodeGen/X86/x86-64-inline-asm.c diff --git a/clang/test/CodeGen/x86-GCC-inline-asm-Y-constraints.c b/clang/test/CodeGen/X86/x86-GCC-inline-asm-Y-constraints.c similarity index 100% rename from clang/test/CodeGen/x86-GCC-inline-asm-Y-constraints.c rename to clang/test/CodeGen/X86/x86-GCC-inline-asm-Y-constraints.c diff --git a/clang/test/CodeGen/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c similarity index 100% rename from clang/test/CodeGen/x86-atomic-long_double.c rename to clang/test/CodeGen/X86/x86-atomic-long_double.c diff --git a/clang/test/CodeGen/x86-bswap.c b/clang/test/CodeGen/X86/x86-bswap.c similarity index 100% rename from clang/test/CodeGen/x86-bswap.c rename to clang/test/CodeGen/X86/x86-bswap.c diff --git a/clang/test/CodeGen/x86-builtins-vector-width.c b/clang/test/CodeGen/X86/x86-builtins-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-builtins-vector-width.c rename to clang/test/CodeGen/X86/x86-builtins-vector-width.c diff --git a/clang/test/CodeGen/x86-builtins.c b/clang/test/CodeGen/X86/x86-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-builtins.c rename to clang/test/CodeGen/X86/x86-builtins.c diff --git a/clang/test/CodeGen/x86-cf-protection.c b/clang/test/CodeGen/X86/x86-cf-protection.c similarity index 100% rename from clang/test/CodeGen/x86-cf-protection.c rename to clang/test/CodeGen/X86/x86-cf-protection.c diff --git a/clang/test/CodeGen/x86-crc-builtins.c b/clang/test/CodeGen/X86/x86-crc-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-crc-builtins.c rename to clang/test/CodeGen/X86/x86-crc-builtins.c diff --git a/clang/test/CodeGen/x86-enqcmd-builtins.c b/clang/test/CodeGen/X86/x86-enqcmd-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-enqcmd-builtins.c rename to clang/test/CodeGen/X86/x86-enqcmd-builtins.c diff --git a/clang/test/CodeGen/x86-inline-asm-min-vector-width.c b/clang/test/CodeGen/X86/x86-inline-asm-min-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-inline-asm-min-vector-width.c rename to clang/test/CodeGen/X86/x86-inline-asm-min-vector-width.c diff --git a/clang/test/CodeGen/x86-inline-asm-v-constraint.c b/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c similarity index 100% rename from clang/test/CodeGen/x86-inline-asm-v-constraint.c rename to clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c diff --git a/clang/test/CodeGen/x86-long-double.cpp b/clang/test/CodeGen/X86/x86-long-double.cpp similarity index 100% rename from clang/test/CodeGen/x86-long-double.cpp rename to clang/test/CodeGen/X86/x86-long-double.cpp diff --git a/clang/test/CodeGen/x86-nontemporal.c b/clang/test/CodeGen/X86/x86-nontemporal.c similarity index 100% rename from clang/test/CodeGen/x86-nontemporal.c rename to clang/test/CodeGen/X86/x86-nontemporal.c diff --git a/clang/test/CodeGen/x86-serialize-intrin.c b/clang/test/CodeGen/X86/x86-serialize-intrin.c similarity index 100% rename from clang/test/CodeGen/x86-serialize-intrin.c rename to clang/test/CodeGen/X86/x86-serialize-intrin.c diff --git a/clang/test/CodeGen/x86-soft-float.c b/clang/test/CodeGen/X86/x86-soft-float.c similarity index 100% rename from clang/test/CodeGen/x86-soft-float.c rename to clang/test/CodeGen/X86/x86-soft-float.c diff --git a/clang/test/CodeGen/x86-tsxldtrk-builtins.c b/clang/test/CodeGen/X86/x86-tsxldtrk-builtins.c similarity index 100% rename from clang/test/CodeGen/x86-tsxldtrk-builtins.c rename to clang/test/CodeGen/X86/x86-tsxldtrk-builtins.c diff --git a/clang/test/CodeGen/x86-vec-i128.c b/clang/test/CodeGen/X86/x86-vec-i128.c similarity index 100% rename from clang/test/CodeGen/x86-vec-i128.c rename to clang/test/CodeGen/X86/x86-vec-i128.c diff --git a/clang/test/CodeGen/x86-vec-struct-packing.c b/clang/test/CodeGen/X86/x86-vec-struct-packing.c similarity index 100% rename from clang/test/CodeGen/x86-vec-struct-packing.c rename to clang/test/CodeGen/X86/x86-vec-struct-packing.c diff --git a/clang/test/CodeGen/x86-vector-width.c b/clang/test/CodeGen/X86/x86-vector-width.c similarity index 100% rename from clang/test/CodeGen/x86-vector-width.c rename to clang/test/CodeGen/X86/x86-vector-width.c diff --git a/clang/test/CodeGen/x86.c b/clang/test/CodeGen/X86/x86.c similarity index 100% rename from clang/test/CodeGen/x86.c rename to clang/test/CodeGen/X86/x86.c diff --git a/clang/test/CodeGen/x86_32-arguments-darwin.c b/clang/test/CodeGen/X86/x86_32-arguments-darwin.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-darwin.c rename to clang/test/CodeGen/X86/x86_32-arguments-darwin.c diff --git a/clang/test/CodeGen/x86_32-arguments-iamcu.c b/clang/test/CodeGen/X86/x86_32-arguments-iamcu.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-iamcu.c rename to clang/test/CodeGen/X86/x86_32-arguments-iamcu.c diff --git a/clang/test/CodeGen/x86_32-arguments-linux.c b/clang/test/CodeGen/X86/x86_32-arguments-linux.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-linux.c rename to clang/test/CodeGen/X86/x86_32-arguments-linux.c diff --git a/clang/test/CodeGen/x86_32-arguments-nommx.c b/clang/test/CodeGen/X86/x86_32-arguments-nommx.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-nommx.c rename to clang/test/CodeGen/X86/x86_32-arguments-nommx.c diff --git a/clang/test/CodeGen/x86_32-arguments-realign.c b/clang/test/CodeGen/X86/x86_32-arguments-realign.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-realign.c rename to clang/test/CodeGen/X86/x86_32-arguments-realign.c diff --git a/clang/test/CodeGen/x86_32-arguments-win32.c b/clang/test/CodeGen/X86/x86_32-arguments-win32.c similarity index 100% rename from clang/test/CodeGen/x86_32-arguments-win32.c rename to clang/test/CodeGen/X86/x86_32-arguments-win32.c diff --git a/clang/test/CodeGen/x86_32-fpcc-struct-return.c b/clang/test/CodeGen/X86/x86_32-fpcc-struct-return.c similarity index 100% rename from clang/test/CodeGen/x86_32-fpcc-struct-return.c rename to clang/test/CodeGen/X86/x86_32-fpcc-struct-return.c diff --git a/clang/test/CodeGen/x86_32-inline-asm.c b/clang/test/CodeGen/X86/x86_32-inline-asm.c similarity index 100% rename from clang/test/CodeGen/x86_32-inline-asm.c rename to clang/test/CodeGen/X86/x86_32-inline-asm.c diff --git a/clang/test/CodeGen/x86_32-xsave.c b/clang/test/CodeGen/X86/x86_32-xsave.c similarity index 100% rename from clang/test/CodeGen/x86_32-xsave.c rename to clang/test/CodeGen/X86/x86_32-xsave.c diff --git a/clang/test/CodeGen/x86_64-PR42672.c b/clang/test/CodeGen/X86/x86_64-PR42672.c similarity index 100% rename from clang/test/CodeGen/x86_64-PR42672.c rename to clang/test/CodeGen/X86/x86_64-PR42672.c diff --git a/clang/test/CodeGen/x86_64-arguments-darwin.c b/clang/test/CodeGen/X86/x86_64-arguments-darwin.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-darwin.c rename to clang/test/CodeGen/X86/x86_64-arguments-darwin.c diff --git a/clang/test/CodeGen/x86_64-arguments-nacl.c b/clang/test/CodeGen/X86/x86_64-arguments-nacl.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-nacl.c rename to clang/test/CodeGen/X86/x86_64-arguments-nacl.c diff --git a/clang/test/CodeGen/x86_64-arguments-win32.c b/clang/test/CodeGen/X86/x86_64-arguments-win32.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments-win32.c rename to clang/test/CodeGen/X86/x86_64-arguments-win32.c diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/X86/x86_64-arguments.c similarity index 100% rename from clang/test/CodeGen/x86_64-arguments.c rename to clang/test/CodeGen/X86/x86_64-arguments.c diff --git a/clang/test/CodeGen/x86_64-atomic-128.c b/clang/test/CodeGen/X86/x86_64-atomic-128.c similarity index 100% rename from clang/test/CodeGen/x86_64-atomic-128.c rename to clang/test/CodeGen/X86/x86_64-atomic-128.c diff --git a/clang/test/CodeGen/x86_64-floatvectors.c b/clang/test/CodeGen/X86/x86_64-floatvectors.c similarity index 100% rename from clang/test/CodeGen/x86_64-floatvectors.c rename to clang/test/CodeGen/X86/x86_64-floatvectors.c diff --git a/clang/test/CodeGen/x86_64-instrument-functions.c b/clang/test/CodeGen/X86/x86_64-instrument-functions.c similarity index 100% rename from clang/test/CodeGen/x86_64-instrument-functions.c rename to clang/test/CodeGen/X86/x86_64-instrument-functions.c diff --git a/clang/test/CodeGen/x86_64-longdouble.c b/clang/test/CodeGen/X86/x86_64-longdouble.c similarity index 100% rename from clang/test/CodeGen/x86_64-longdouble.c rename to clang/test/CodeGen/X86/x86_64-longdouble.c diff --git a/clang/test/CodeGen/x86_64-mno-sse.c b/clang/test/CodeGen/X86/x86_64-mno-sse.c similarity index 100% rename from clang/test/CodeGen/x86_64-mno-sse.c rename to clang/test/CodeGen/X86/x86_64-mno-sse.c diff --git a/clang/test/CodeGen/x86_64-mno-sse2.c b/clang/test/CodeGen/X86/x86_64-mno-sse2.c similarity index 100% rename from clang/test/CodeGen/x86_64-mno-sse2.c rename to clang/test/CodeGen/X86/x86_64-mno-sse2.c diff --git a/clang/test/CodeGen/x86_64-profiling-keep-fp.c b/clang/test/CodeGen/X86/x86_64-profiling-keep-fp.c similarity index 100% rename from clang/test/CodeGen/x86_64-profiling-keep-fp.c rename to clang/test/CodeGen/X86/x86_64-profiling-keep-fp.c diff --git a/clang/test/CodeGen/x86_64-xsave.c b/clang/test/CodeGen/X86/x86_64-xsave.c similarity index 100% rename from clang/test/CodeGen/x86_64-xsave.c rename to clang/test/CodeGen/X86/x86_64-xsave.c diff --git a/clang/test/CodeGen/x86_inlineasm_curly_bracket_escape.c b/clang/test/CodeGen/X86/x86_inlineasm_curly_bracket_escape.c similarity index 100% rename from clang/test/CodeGen/x86_inlineasm_curly_bracket_escape.c rename to clang/test/CodeGen/X86/x86_inlineasm_curly_bracket_escape.c From df63eedef64d715ce1f31843f7de9c11fe1e597f Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 8 Sep 2020 14:02:46 +0200 Subject: [PATCH 069/161] [mlir][VectorOps] Put back anonymous namespace to work around GCC5 bug. VectorToSCF.cpp:241:61: error: specialization of 'template mlir::LogicalResult {anonymous}::NDTransferOpHelper::doReplace()' in different namespace [-fpermissive] --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 0eb46f7ba3cfb6..0a74472a49f6e2 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -131,7 +131,6 @@ class NDTransferOpHelper { VectorType minorVectorType; // vector<(minor_dims) x type> MemRefType memRefMinorVectorType; // memref> }; -} // namespace template void NDTransferOpHelper::emitLoops( @@ -395,6 +394,8 @@ LogicalResult NDTransferOpHelper::doReplace() { return success(); } +} // namespace + /// Analyzes the `transfer` to find an access dimension along the fastest remote /// MemRef dimension. If such a dimension with coalescing properties is found, /// `pivs` and `vectorBoundsCapture` are swapped so that the invocation of From 4e9f4d0b9d1dbf2c1d3e389b870a16c3dbd5c302 Mon Sep 17 00:00:00 2001 From: Ehsan Toosi Date: Mon, 24 Aug 2020 13:19:50 +0200 Subject: [PATCH 070/161] [mlir] Fix bug in copy removal A crash could happen due to copy removal. The bug is fixed and two more test cases are added. Differential Revision: https://reviews.llvm.org/D87128 --- mlir/lib/Transforms/CopyRemoval.cpp | 37 +++++++++++---- mlir/test/Transforms/copy-removal.mlir | 64 ++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Transforms/CopyRemoval.cpp b/mlir/lib/Transforms/CopyRemoval.cpp index ccfd02630ac288..c5a8da63295681 100644 --- a/mlir/lib/Transforms/CopyRemoval.cpp +++ b/mlir/lib/Transforms/CopyRemoval.cpp @@ -30,16 +30,35 @@ class CopyRemovalPass : public PassWrapper> { reuseCopySourceAsTarget(copyOp); reuseCopyTargetAsSource(copyOp); }); + for (std::pair &pair : replaceList) + pair.first.replaceAllUsesWith(pair.second); for (Operation *op : eraseList) op->erase(); } private: /// List of operations that need to be removed. - DenseSet eraseList; + llvm::SmallPtrSet eraseList; + + /// List of values that need to be replaced with their counterparts. + llvm::SmallDenseSet, 4> replaceList; + + /// Returns the allocation operation for `value` in `block` if it exists. + /// nullptr otherwise. + Operation *getAllocationOpInBlock(Value value, Block *block) { + assert(block && "Block cannot be null"); + Operation *op = value.getDefiningOp(); + if (op && op->getBlock() == block) { + auto effects = dyn_cast(op); + if (effects && effects.hasEffect()) + return op; + } + return nullptr; + } /// Returns the deallocation operation for `value` in `block` if it exists. - Operation *getDeallocationInBlock(Value value, Block *block) { + /// nullptr otherwise. + Operation *getDeallocationOpInBlock(Value value, Block *block) { assert(block && "Block cannot be null"); auto valueUsers = value.getUsers(); auto it = llvm::find_if(valueUsers, [&](Operation *op) { @@ -119,9 +138,10 @@ class CopyRemovalPass : public PassWrapper> { Value to = copyOp.getTarget(); Operation *copy = copyOp.getOperation(); + Block *copyBlock = copy->getBlock(); Operation *fromDefiningOp = from.getDefiningOp(); - Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); - Operation *toDefiningOp = to.getDefiningOp(); + Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock); + Operation *toDefiningOp = getAllocationOpInBlock(to, copyBlock); if (!fromDefiningOp || !fromFreeingOp || !toDefiningOp || !areOpsInTheSameBlock({fromFreeingOp, toDefiningOp, copy}) || hasUsersBetween(to, toDefiningOp, copy) || @@ -129,7 +149,7 @@ class CopyRemovalPass : public PassWrapper> { hasMemoryEffectOpBetween(copy, fromFreeingOp)) return; - to.replaceAllUsesWith(from); + replaceList.insert({to, from}); eraseList.insert(copy); eraseList.insert(toDefiningOp); eraseList.insert(fromFreeingOp); @@ -169,8 +189,9 @@ class CopyRemovalPass : public PassWrapper> { Value to = copyOp.getTarget(); Operation *copy = copyOp.getOperation(); - Operation *fromDefiningOp = from.getDefiningOp(); - Operation *fromFreeingOp = getDeallocationInBlock(from, copy->getBlock()); + Block *copyBlock = copy->getBlock(); + Operation *fromDefiningOp = getAllocationOpInBlock(from, copyBlock); + Operation *fromFreeingOp = getDeallocationOpInBlock(from, copyBlock); if (!fromDefiningOp || !fromFreeingOp || !areOpsInTheSameBlock({fromFreeingOp, fromDefiningOp, copy}) || hasUsersBetween(to, fromDefiningOp, copy) || @@ -178,7 +199,7 @@ class CopyRemovalPass : public PassWrapper> { hasMemoryEffectOpBetween(copy, fromFreeingOp)) return; - from.replaceAllUsesWith(to); + replaceList.insert({from, to}); eraseList.insert(copy); eraseList.insert(fromDefiningOp); eraseList.insert(fromFreeingOp); diff --git a/mlir/test/Transforms/copy-removal.mlir b/mlir/test/Transforms/copy-removal.mlir index f750dabb18a048..a0d1193b77d58d 100644 --- a/mlir/test/Transforms/copy-removal.mlir +++ b/mlir/test/Transforms/copy-removal.mlir @@ -283,3 +283,67 @@ func @test_ReuseCopyTargetAsSource(%arg0: memref<2xf32>){ dealloc %temp : memref<2xf32> return } + +// ----- + +// The only redundant copy is linalg.copy(%4, %5) + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<2xf32>, %arg4: memref<2xf32>) { + // CHECK: %{{.*}} = alloc() + %0 = alloc() : memref<2xf32> + dealloc %0 : memref<2xf32> + // CHECK: %{{.*}} = alloc() + %1 = alloc() : memref<2xf32> + // CHECK: linalg.copy + linalg.copy(%arg3, %1) : memref<2xf32>, memref<2xf32> + %2 = scf.for %arg5 = %arg0 to %arg1 step %arg2 iter_args(%arg6 = %1) -> (memref<2xf32>) { + %3 = cmpi "eq", %arg5, %arg1 : index + // CHECK: dealloc + dealloc %arg6 : memref<2xf32> + // CHECK: %[[PERCENT4:.*]] = alloc() + %4 = alloc() : memref<2xf32> + // CHECK-NOT: alloc + // CHECK-NOT: linalg.copy + // CHECK-NOT: dealloc + %5 = alloc() : memref<2xf32> + linalg.copy(%4, %5) : memref<2xf32>, memref<2xf32> + dealloc %4 : memref<2xf32> + // CHECK: %[[PERCENT6:.*]] = alloc() + %6 = alloc() : memref<2xf32> + // CHECK: linalg.copy(%[[PERCENT4]], %[[PERCENT6]]) + linalg.copy(%5, %6) : memref<2xf32>, memref<2xf32> + scf.yield %6 : memref<2xf32> + } + // CHECK: linalg.copy + linalg.copy(%2, %arg4) : memref<2xf32>, memref<2xf32> + dealloc %2 : memref<2xf32> + return +} + +// ----- + +// The linalg.copy operation can be removed in addition to alloc and dealloc +// operations. All uses of %0 is then replaced with %arg2. + +// CHECK-LABEL: func @check_with_affine_dialect +func @check_with_affine_dialect(%arg0: memref<4xf32>, %arg1: memref<4xf32>, %arg2: memref<4xf32>) { + // CHECK-SAME: (%[[ARG0:.*]]: memref<4xf32>, %[[ARG1:.*]]: memref<4xf32>, %[[RES:.*]]: memref<4xf32>) + // CHECK-NOT: alloc + %0 = alloc() : memref<4xf32> + affine.for %arg3 = 0 to 4 { + %5 = affine.load %arg0[%arg3] : memref<4xf32> + %6 = affine.load %arg1[%arg3] : memref<4xf32> + %7 = cmpf "ogt", %5, %6 : f32 + // CHECK: %[[SELECT_RES:.*]] = select + %8 = select %7, %5, %6 : f32 + // CHECK-NEXT: affine.store %[[SELECT_RES]], %[[RES]] + affine.store %8, %0[%arg3] : memref<4xf32> + } + // CHECK-NOT: linalg.copy + // CHECK-NOT: dealloc + "linalg.copy"(%0, %arg2) : (memref<4xf32>, memref<4xf32>) -> () + dealloc %0 : memref<4xf32> + //CHECK: return + return +} From 86bd8f82cc74725a08a40efe176d3d6b9c9cef92 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Sat, 5 Sep 2020 17:52:23 +0300 Subject: [PATCH 071/161] [CMake] Remove dead FindPythonInterp code LLVM has bumped the minimum required CMake version to 3.13.4, so this has become dead code. Reviewed By: #libc, ldionne Differential Revision: https://reviews.llvm.org/D87189 --- clang/CMakeLists.txt | 37 +++++++++--------------------------- compiler-rt/CMakeLists.txt | 33 +++++++++----------------------- libcxx/CMakeLists.txt | 36 +++++++++++------------------------ lld/CMakeLists.txt | 39 ++++++++++---------------------------- llvm/CMakeLists.txt | 37 +++++++++--------------------------- 5 files changed, 48 insertions(+), 134 deletions(-) diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 5ac0e6b6ef0cb1..f015951c7ec727 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -136,38 +136,19 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) if(LLVM_INCLUDE_TESTS) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR - "Unable to find Python interpreter, required for builds and testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Check prebuilt llvm/utils. diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 0a0294f937dbab..9967e293749bd8 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -81,34 +81,19 @@ if (COMPILER_RT_STANDALONE_BUILD) set_target_properties(intrinsics_gen PROPERTIES FOLDER "Compiler-RT Misc") endif() - if(CMAKE_VERSION VERSION_LESS 3.12) - # Find Python interpreter. - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR " - Unable to find Python interpreter required testing. Please install Python - or specify the PYTHON_EXECUTABLE CMake variable.") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Ensure that fat libraries are built correctly on Darwin diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index ea0aa0a259a22c..a5c32d94aea295 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -41,33 +41,19 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL endif() if (LIBCXX_STANDALONE_BUILD) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if( NOT PYTHONINTERP_FOUND ) - message(WARNING "Failed to find python interpreter. " - "The libc++ test suite will be disabled.") - set(LLVM_INCLUDE_TESTS OFF) - else() - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) + set_target_properties(Python3::Interpreter PROPERTIES + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() endif() diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index 7dae682cdef07a..34a7a68da42c50 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -57,38 +57,19 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) include(CheckAtomic) if(LLVM_INCLUDE_TESTS) - if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if(NOT PYTHONINTERP_FOUND) - message(FATAL_ERROR - "Unable to find Python interpreter, required for testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if(${PYTHON_VERSION_STRING} VERSION_LESS 2.7) - message(FATAL_ERROR "Python 2.7 or newer is required") + find_package(Python3 COMPONENTS Interpreter) + if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() - add_executable(Python3::Interpeter IMPORTED) + # Treat python2 as python3 + add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) - else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() # Check prebuilt llvm/utils. diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 4a7639c51121d9..410103b0bfd687 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -696,38 +696,19 @@ option(LLVM_ENABLE_PLUGINS "Enable plugin support" ${LLVM_ENABLE_PLUGINS_default include(HandleLLVMOptions) -if(CMAKE_VERSION VERSION_LESS 3.12) - include(FindPythonInterp) - if( NOT PYTHONINTERP_FOUND ) - message(FATAL_ERROR - "Unable to find Python interpreter, required for builds and testing. - - Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") - endif() - - if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 ) - message(FATAL_ERROR "Python 2.7 or newer is required") +find_package(Python3 COMPONENTS Interpreter) +if(NOT Python3_Interpreter_FOUND) + message(WARNING "Python3 not found, using python2 as a fallback") + find_package(Python2 COMPONENTS Interpreter REQUIRED) + if(Python2_VERSION VERSION_LESS 2.7) + message(SEND_ERROR "Python 2.7 or newer is required") endif() + # Treat python2 as python3 add_executable(Python3::Interpreter IMPORTED) set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${PYTHON_EXECUTABLE}) - set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE}) -else() - find_package(Python3 COMPONENTS Interpreter) - if(NOT Python3_Interpreter_FOUND) - message(WARNING "Python3 not found, using python2 as a fallback") - find_package(Python2 COMPONENTS Interpreter REQUIRED) - if(Python2_VERSION VERSION_LESS 2.7) - message(SEND_ERROR "Python 2.7 or newer is required") - endif() - - # Treat python2 as python3 - add_executable(Python3::Interpreter IMPORTED) - set_target_properties(Python3::Interpreter PROPERTIES - IMPORTED_LOCATION ${Python2_EXECUTABLE}) - set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) - endif() + IMPORTED_LOCATION ${Python2_EXECUTABLE}) + set(Python3_EXECUTABLE ${Python2_EXECUTABLE}) endif() ###### From e67405141836fcd88183863758eeb42f32e847a6 Mon Sep 17 00:00:00 2001 From: Denys Petrov Date: Fri, 4 Sep 2020 15:03:09 +0300 Subject: [PATCH 072/161] [analyzer] [NFC] Introduce refactoring of PthreadLockChecker Change capitalization of some names due to LLVM naming rules. Change names of some variables to make them more speaking. Rework similar bug reports into one common function. Prepare code for the next patches to reduce unrelated changes. Differential Revision: https://reviews.llvm.org/D87138 --- .../Checkers/PthreadLockChecker.cpp | 271 ++++++++---------- 1 file changed, 118 insertions(+), 153 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp index 285d2da104f1ac..88e80c481a5a7f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp @@ -83,7 +83,7 @@ class PthreadLockChecker : public Checker PThreadCallbacks = { // Init. {{"pthread_mutex_init", 2}, &PthreadLockChecker::InitAnyLock}, @@ -167,46 +167,49 @@ class PthreadLockChecker : public Checker BT[], + const Expr *MtxExpr, CheckerKind CheckKind, + StringRef Desc) const; // Init. void InitAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void InitLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal Lock, CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void InitLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const; // Lock, Try-lock. void AcquirePthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void AcquireXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryFuchsiaLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void TryC11Lock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void AcquireLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal lock, bool isTryLock, LockingSemantics semantics, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void AcquireLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, bool IsTryLock, + LockingSemantics Semantics, CheckerKind CheckKind) const; // Release. void ReleaseAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void ReleaseLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal lock, CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void ReleaseLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const; // Destroy. void DestroyPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; void DestroyXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkkind) const; - void DestroyLockAux(const CallEvent &Call, CheckerContext &C, unsigned ArgNo, - SVal Lock, LockingSemantics semantics, - CheckerKind checkkind) const; + CheckerKind CheckKind) const; + void DestroyLockAux(const CallEvent &Call, CheckerContext &C, + const Expr *MtxExpr, SVal MtxVal, + LockingSemantics Semantics, CheckerKind CheckKind) const; public: void checkPostCall(const CallEvent &Call, CheckerContext &C) const; @@ -226,18 +229,18 @@ class PthreadLockChecker : public Checker BT_initlock[CK_NumCheckKinds]; mutable std::unique_ptr BT_lor[CK_NumCheckKinds]; - void initBugType(CheckerKind checkKind) const { - if (BT_doublelock[checkKind]) + void initBugType(CheckerKind CheckKind) const { + if (BT_doublelock[CheckKind]) return; - BT_doublelock[checkKind].reset( - new BugType{CheckNames[checkKind], "Double locking", "Lock checker"}); - BT_doubleunlock[checkKind].reset( - new BugType{CheckNames[checkKind], "Double unlocking", "Lock checker"}); - BT_destroylock[checkKind].reset(new BugType{ - CheckNames[checkKind], "Use destroyed lock", "Lock checker"}); - BT_initlock[checkKind].reset(new BugType{ - CheckNames[checkKind], "Init invalid lock", "Lock checker"}); - BT_lor[checkKind].reset(new BugType{CheckNames[checkKind], + BT_doublelock[CheckKind].reset( + new BugType{CheckNames[CheckKind], "Double locking", "Lock checker"}); + BT_doubleunlock[CheckKind].reset( + new BugType{CheckNames[CheckKind], "Double unlocking", "Lock checker"}); + BT_destroylock[CheckKind].reset(new BugType{ + CheckNames[CheckKind], "Use destroyed lock", "Lock checker"}); + BT_initlock[CheckKind].reset(new BugType{ + CheckNames[CheckKind], "Init invalid lock", "Lock checker"}); + BT_lor[CheckKind].reset(new BugType{CheckNames[CheckKind], "Lock order reversal", "Lock checker"}); } }; @@ -341,53 +344,53 @@ void PthreadLockChecker::printState(raw_ostream &Out, ProgramStateRef State, void PthreadLockChecker::AcquirePthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), false, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), false, + PthreadSemantics, CheckKind); } void PthreadLockChecker::AcquireXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), false, XNUSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), false, + XNUSemantics, CheckKind); } void PthreadLockChecker::TryPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryFuchsiaLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::TryC11Lock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - AcquireLockAux(Call, C, 0, Call.getArgSVal(0), true, PthreadSemantics, - checkKind); + CheckerKind CheckKind) const { + AcquireLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), true, + PthreadSemantics, CheckKind); } void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal lock, bool isTryLock, - enum LockingSemantics semantics, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, bool IsTryLock, + enum LockingSemantics Semantics, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *lockR = lock.getAsRegion(); + const MemRegion *lockR = MtxVal.getAsRegion(); if (!lockR) return; @@ -398,28 +401,23 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, if (const LockState *LState = state->get(lockR)) { if (LState->isLocked()) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto report = std::make_unique( - *BT_doublelock[checkKind], "This lock has already been acquired", N); - report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(report)); + reportBug(C, BT_doublelock, MtxExpr, CheckKind, + "This lock has already been acquired"); return; } else if (LState->isDestroyed()) { - reportUseDestroyedBug(Call, C, ArgNo, checkKind); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, + "This lock has already been destroyed"); return; } } ProgramStateRef lockSucc = state; - if (isTryLock) { + if (IsTryLock) { // Bifurcate the state, and allow a mode where the lock acquisition fails. SVal RetVal = Call.getReturnValue(); if (auto DefinedRetVal = RetVal.getAs()) { ProgramStateRef lockFail; - switch (semantics) { + switch (Semantics) { case PthreadSemantics: std::tie(lockFail, lockSucc) = state->assume(*DefinedRetVal); break; @@ -434,7 +432,7 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, } // We might want to handle the case when the mutex lock function was inlined // and returned an Unknown or Undefined value. - } else if (semantics == PthreadSemantics) { + } else if (Semantics == PthreadSemantics) { // Assume that the return value was 0. SVal RetVal = Call.getReturnValue(); if (auto DefinedRetVal = RetVal.getAs()) { @@ -447,7 +445,7 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, // and returned an Unknown or Undefined value. } else { // XNU locking semantics return void on non-try locks - assert((semantics == XNUSemantics) && "Unknown locking semantics"); + assert((Semantics == XNUSemantics) && "Unknown locking semantics"); lockSucc = state; } @@ -459,18 +457,18 @@ void PthreadLockChecker::AcquireLockAux(const CallEvent &Call, void PthreadLockChecker::ReleaseAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - ReleaseLockAux(Call, C, 0, Call.getArgSVal(0), checkKind); + CheckerKind CheckKind) const { + ReleaseLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), CheckKind); } void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal lock, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *lockR = lock.getAsRegion(); + const MemRegion *lockR = MtxVal.getAsRegion(); if (!lockR) return; @@ -481,18 +479,12 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, if (const LockState *LState = state->get(lockR)) { if (LState->isUnlocked()) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_doubleunlock[checkKind], "This lock has already been unlocked", - N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_doubleunlock, MtxExpr, CheckKind, + "This lock has already been unlocked"); return; } else if (LState->isDestroyed()) { - reportUseDestroyedBug(Call, C, ArgNo, checkKind); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, + "This lock has already been destroyed"); return; } } @@ -502,17 +494,9 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, if (!LS.isEmpty()) { const MemRegion *firstLockR = LS.getHead(); if (firstLockR != lockR) { - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto report = std::make_unique( - *BT_lor[checkKind], - "This was not the most recently acquired lock. Possible " - "lock order reversal", - N); - report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(report)); + reportBug(C, BT_lor, MtxExpr, CheckKind, + "This was not the most recently acquired lock. Possible lock " + "order reversal"); return; } // Record that the lock was released. @@ -525,25 +509,27 @@ void PthreadLockChecker::ReleaseLockAux(const CallEvent &Call, void PthreadLockChecker::DestroyPthreadLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - DestroyLockAux(Call, C, 0, Call.getArgSVal(0), PthreadSemantics, checkKind); + CheckerKind CheckKind) const { + DestroyLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), + PthreadSemantics, CheckKind); } void PthreadLockChecker::DestroyXNULock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - DestroyLockAux(Call, C, 0, Call.getArgSVal(0), XNUSemantics, checkKind); + CheckerKind CheckKind) const { + DestroyLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), XNUSemantics, + CheckKind); } void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, - CheckerContext &C, unsigned ArgNo, - SVal Lock, - enum LockingSemantics semantics, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + CheckerContext &C, const Expr *MtxExpr, + SVal MtxVal, + enum LockingSemantics Semantics, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *LockR = Lock.getAsRegion(); + const MemRegion *LockR = MtxVal.getAsRegion(); if (!LockR) return; @@ -556,7 +542,7 @@ void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, const LockState *LState = State->get(LockR); // Checking the return value of the destroy method only in the case of // PthreadSemantics - if (semantics == PthreadSemantics) { + if (Semantics == PthreadSemantics) { if (!LState || LState->isUnlocked()) { SymbolRef sym = Call.getReturnValue().getAsSymbol(); if (!sym) { @@ -581,36 +567,26 @@ void PthreadLockChecker::DestroyLockAux(const CallEvent &Call, return; } } - StringRef Message; - if (LState->isLocked()) { - Message = "This lock is still locked"; - } else { - Message = "This lock has already been destroyed"; - } + StringRef Message = LState->isLocked() + ? "This lock is still locked" + : "This lock has already been destroyed"; - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_destroylock[checkKind], Message, N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_destroylock, MtxExpr, CheckKind, Message); } void PthreadLockChecker::InitAnyLock(const CallEvent &Call, CheckerContext &C, - CheckerKind checkKind) const { - InitLockAux(Call, C, 0, Call.getArgSVal(0), checkKind); + CheckerKind CheckKind) const { + InitLockAux(Call, C, Call.getArgExpr(0), Call.getArgSVal(0), CheckKind); } void PthreadLockChecker::InitLockAux(const CallEvent &Call, CheckerContext &C, - unsigned ArgNo, SVal Lock, - CheckerKind checkKind) const { - if (!ChecksEnabled[checkKind]) + const Expr *MtxExpr, SVal MtxVal, + CheckerKind CheckKind) const { + if (!ChecksEnabled[CheckKind]) return; - const MemRegion *LockR = Lock.getAsRegion(); + const MemRegion *LockR = MtxVal.getAsRegion(); if (!LockR) return; @@ -627,35 +603,24 @@ void PthreadLockChecker::InitLockAux(const CallEvent &Call, CheckerContext &C, return; } - StringRef Message; - - if (LState->isLocked()) { - Message = "This lock is still being held"; - } else { - Message = "This lock has already been initialized"; - } + StringRef Message = LState->isLocked() + ? "This lock is still being held" + : "This lock has already been initialized"; - ExplodedNode *N = C.generateErrorNode(); - if (!N) - return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_initlock[checkKind], Message, N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); - C.emitReport(std::move(Report)); + reportBug(C, BT_initlock, MtxExpr, CheckKind, Message); } -void PthreadLockChecker::reportUseDestroyedBug(const CallEvent &Call, - CheckerContext &C, - unsigned ArgNo, - CheckerKind checkKind) const { +void PthreadLockChecker::reportBug(CheckerContext &C, + std::unique_ptr BT[], + const Expr *MtxExpr, CheckerKind CheckKind, + StringRef Desc) const { ExplodedNode *N = C.generateErrorNode(); if (!N) return; - initBugType(checkKind); - auto Report = std::make_unique( - *BT_destroylock[checkKind], "This lock has already been destroyed", N); - Report->addRange(Call.getArgExpr(ArgNo)->getSourceRange()); + initBugType(CheckKind); + auto Report = + std::make_unique(*BT[CheckKind], Desc, N); + Report->addRange(MtxExpr->getSourceRange()); C.emitReport(std::move(Report)); } From 4964d75d7078b932ac6b17c1990adaa6eada75c1 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 8 Sep 2020 09:17:01 -0400 Subject: [PATCH 073/161] [InstCombine] add bitwise logic fold tests for D86395; NFC --- llvm/test/Transforms/InstCombine/xor.ll | 74 +++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll index 312b0125f626f2..ba275a6066419d 100644 --- a/llvm/test/Transforms/InstCombine/xor.ll +++ b/llvm/test/Transforms/InstCombine/xor.ll @@ -1171,3 +1171,77 @@ define i8 @not_ashr_wrong_const(i8 %x) { %r = xor i8 %a, -2 ret i8 %r } + +; (~A & B) ^ A --> (A | B) +; The division ops are here to thwart complexity-based canonicalization: all ops are binops. + +define i32 @test52(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test52( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[A]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %o, %b + %z = xor i32 %r, %a + ret i32 %z +} + +; (~B & A) ^ B --> (A | B) +; The division ops are here to thwart complexity-based canonicalization: all ops are binops. + +define i32 @test53(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test53( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[B]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[A]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[B]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %b, -1 + %r = and i32 %o, %a + %z = xor i32 %r, %b + ret i32 %z +} + +define i32 @test54(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test54( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[R]], [[A]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %b, %o + %z = xor i32 %r, %a + ret i32 %z +} + +define i32 @test55(i32 %p1, i32 %p2) { +; CHECK-LABEL: @test55( +; CHECK-NEXT: [[A:%.*]] = udiv i32 42, [[P1:%.*]] +; CHECK-NEXT: [[B:%.*]] = udiv i32 42, [[P2:%.*]] +; CHECK-NEXT: [[O:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[R:%.*]] = and i32 [[B]], [[O]] +; CHECK-NEXT: [[Z:%.*]] = xor i32 [[A]], [[R]] +; CHECK-NEXT: ret i32 [[Z]] +; + %a = udiv i32 42, %p1 + %b = udiv i32 42, %p2 + %o = xor i32 %a, -1 + %r = and i32 %o, %b + %z = xor i32 %a, %r + ret i32 %z +} From 156b127945a8c923d141e608b7380427da024376 Mon Sep 17 00:00:00 2001 From: Frank Derry Wanye Date: Tue, 8 Sep 2020 09:35:14 -0400 Subject: [PATCH 074/161] Add a new altera check for structure packing and alignment. The altera struct pack align lint check finds structs that are inefficiently packed or aligned and recommends packing/aligning of the structs using the packed and aligned attributes as needed in a warning. --- clang-tools-extra/clang-tidy/CMakeLists.txt | 2 + .../clang-tidy/ClangTidyForceLinker.h | 5 + .../clang-tidy/altera/AlteraTidyModule.cpp | 39 +++++ .../clang-tidy/altera/CMakeLists.txt | 15 ++ .../altera/StructPackAlignCheck.cpp | 144 ++++++++++++++++++ .../clang-tidy/altera/StructPackAlignCheck.h | 41 +++++ clang-tools-extra/docs/ReleaseNotes.rst | 21 +++ .../checks/altera-struct-pack-align.rst | 54 +++++++ .../docs/clang-tidy/checks/list.rst | 1 + clang-tools-extra/docs/clang-tidy/index.rst | 1 + .../checkers/altera-struct-pack-align.cpp | 101 ++++++++++++ 11 files changed, 424 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp create mode 100644 clang-tools-extra/clang-tidy/altera/CMakeLists.txt create mode 100644 clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt index 02573534ccaef8..923976197ebe86 100644 --- a/clang-tools-extra/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/CMakeLists.txt @@ -46,6 +46,7 @@ endif() # If you add a check, also add it to ClangTidyForceLinker.h in this directory. add_subdirectory(android) add_subdirectory(abseil) +add_subdirectory(altera) add_subdirectory(boost) add_subdirectory(bugprone) add_subdirectory(cert) @@ -71,6 +72,7 @@ add_subdirectory(zircon) set(ALL_CLANG_TIDY_CHECKS clangTidyAndroidModule clangTidyAbseilModule + clangTidyAlteraModule clangTidyBoostModule clangTidyBugproneModule clangTidyCERTModule diff --git a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h index 1d6bd2a4fd6214..63e681f878db2d 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h +++ b/clang-tools-extra/clang-tidy/ClangTidyForceLinker.h @@ -20,6 +20,11 @@ extern volatile int AbseilModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AbseilModuleAnchorDestination = AbseilModuleAnchorSource; +// This anchor is used to force the linker to link the AlteraModule. +extern volatile int AlteraModuleAnchorSource; +static int LLVM_ATTRIBUTE_UNUSED AlteraModuleAnchorDestination = + AlteraModuleAnchorSource; + // This anchor is used to force the linker to link the AndroidModule. extern volatile int AndroidModuleAnchorSource; static int LLVM_ATTRIBUTE_UNUSED AndroidModuleAnchorDestination = diff --git a/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp new file mode 100644 index 00000000000000..d91f67ac148565 --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp @@ -0,0 +1,39 @@ +//===--- AlteraTidyModule.cpp - clang-tidy --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../ClangTidy.h" +#include "../ClangTidyModule.h" +#include "../ClangTidyModuleRegistry.h" +#include "StructPackAlignCheck.h" + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +class AlteraModule : public ClangTidyModule { +public: + void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck( + "altera-struct-pack-align"); + } +}; + +} // namespace altera + +// Register the AlteraTidyModule using this statically initialized variable. +static ClangTidyModuleRegistry::Add + X("altera-module", "Adds Altera FPGA OpenCL lint checks."); + +// This anchor is used to force the linker to link in the generated object file +// and thus register the AlteraModule. +volatile int AlteraModuleAnchorSource = 0; + +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt new file mode 100644 index 00000000000000..45131c1809a23d --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_library(clangTidyAlteraModule + AlteraTidyModule.cpp + StructPackAlignCheck.cpp + + LINK_LIBS + clangAnalysis + clangAST + clangASTMatchers + clangBasic + clangLex + clangTidy + clangTidyUtils + ) diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp new file mode 100644 index 00000000000000..9f28a22a9d03ec --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.cpp @@ -0,0 +1,144 @@ +//===--- StructPackAlignCheck.cpp - clang-tidy ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "StructPackAlignCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecordLayout.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include +#include + +using namespace clang::ast_matchers; + +namespace clang { +namespace tidy { +namespace altera { + +void StructPackAlignCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher(recordDecl(isStruct(), isDefinition(), + unless(isExpansionInSystemHeader())) + .bind("struct"), + this); +} + +CharUnits +StructPackAlignCheck::computeRecommendedAlignment(CharUnits MinByteSize) { + CharUnits NewAlign = CharUnits::fromQuantity(1); + if (!MinByteSize.isPowerOfTwo()) { + int MSB = (int)MinByteSize.getQuantity(); + for (; MSB > 0; MSB /= 2) { + NewAlign = NewAlign.alignTo( + CharUnits::fromQuantity(((int)NewAlign.getQuantity()) * 2)); + // Abort if the computed alignment meets the maximum configured alignment. + if (NewAlign.getQuantity() >= MaxConfiguredAlignment) + break; + } + } else { + NewAlign = MinByteSize; + } + return NewAlign; +} + +void StructPackAlignCheck::check(const MatchFinder::MatchResult &Result) { + const auto *Struct = Result.Nodes.getNodeAs("struct"); + + // Do not trigger on templated struct declarations because the packing and + // alignment requirements are unknown. + if (Struct->isTemplated()) + return; + + // Get sizing info for the struct. + llvm::SmallVector, 10> FieldSizes; + unsigned int TotalBitSize = 0; + for (const FieldDecl *StructField : Struct->fields()) { + // For each StructField, record how big it is (in bits). + // Would be good to use a pair of to advise a better + // packing order. + unsigned int StructFieldWidth = + (unsigned int)Result.Context + ->getTypeInfo(StructField->getType().getTypePtr()) + .Width; + FieldSizes.emplace_back(StructFieldWidth, StructField->getFieldIndex()); + // FIXME: Recommend a reorganization of the struct (sort by StructField + // size, largest to smallest). + TotalBitSize += StructFieldWidth; + } + + uint64_t CharSize = Result.Context->getCharWidth(); + CharUnits CurrSize = Result.Context->getASTRecordLayout(Struct).getSize(); + CharUnits MinByteSize = + CharUnits::fromQuantity(ceil((float)TotalBitSize / CharSize)); + CharUnits MaxAlign = CharUnits::fromQuantity( + ceil((float)Struct->getMaxAlignment() / CharSize)); + CharUnits CurrAlign = + Result.Context->getASTRecordLayout(Struct).getAlignment(); + CharUnits NewAlign = computeRecommendedAlignment(MinByteSize); + + bool IsPacked = Struct->hasAttr(); + bool NeedsPacking = (MinByteSize < CurrSize) && (MaxAlign != NewAlign) && + (CurrSize != NewAlign); + bool NeedsAlignment = CurrAlign.getQuantity() != NewAlign.getQuantity(); + + if (!NeedsAlignment && !NeedsPacking) + return; + + // If it's using much more space than it needs, suggest packing. + // (Do not suggest packing if it is currently explicitly aligned to what the + // minimum byte size would suggest as the new alignment.) + if (NeedsPacking && !IsPacked) { + diag(Struct->getLocation(), + "accessing fields in struct %0 is inefficient due to padding; only " + "needs %1 bytes but is using %2 bytes") + << Struct << (int)MinByteSize.getQuantity() + << (int)CurrSize.getQuantity() + << FixItHint::CreateInsertion(Struct->getEndLoc().getLocWithOffset(1), + " __attribute__((packed))"); + diag(Struct->getLocation(), + "use \"__attribute__((packed))\" to reduce the amount of padding " + "applied to struct %0", + DiagnosticIDs::Note) + << Struct; + } + + FixItHint FixIt; + AlignedAttr *Attribute = Struct->getAttr(); + std::string NewAlignQuantity = std::to_string((int)NewAlign.getQuantity()); + if (Attribute) { + std::ostringstream FixItString; + FixItString << "aligned(" << NewAlignQuantity << ")"; + FixIt = + FixItHint::CreateReplacement(Attribute->getRange(), FixItString.str()); + } else { + std::ostringstream FixItString; + FixItString << " __attribute__((aligned(" << NewAlignQuantity << ")))"; + FixIt = FixItHint::CreateInsertion(Struct->getEndLoc().getLocWithOffset(1), + FixItString.str()); + } + + // And suggest the minimum power-of-two alignment for the struct as a whole + // (with and without packing). + if (NeedsAlignment) { + diag(Struct->getLocation(), + "accessing fields in struct %0 is inefficient due to poor alignment; " + "currently aligned to %1 bytes, but recommended alignment is %2 bytes") + << Struct << (int)CurrAlign.getQuantity() << NewAlignQuantity << FixIt; + + diag(Struct->getLocation(), + "use \"__attribute__((aligned(%0)))\" to align struct %1 to %0 bytes", + DiagnosticIDs::Note) + << NewAlignQuantity << Struct; + } +} + +void StructPackAlignCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "MaxConfiguredAlignment", MaxConfiguredAlignment); +} + +} // namespace altera +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h new file mode 100644 index 00000000000000..b903641247e3c9 --- /dev/null +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h @@ -0,0 +1,41 @@ +//===--- StructPackAlignCheck.h - clang-tidy --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace altera { + +/// Finds structs that are inefficiently packed or aligned, and recommends +/// packing and/or aligning of said structs as needed. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/altera-struct-pack-align.html +class StructPackAlignCheck : public ClangTidyCheck { +public: + StructPackAlignCheck(StringRef Name, ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + MaxConfiguredAlignment(Options.get("MaxConfiguredAlignment", 128)) {} + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + void storeOptions(ClangTidyOptions::OptionMap &Opts); + +private: + const unsigned MaxConfiguredAlignment; + CharUnits computeRecommendedAlignment(CharUnits MinByteSize); +}; + +} // namespace altera +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ALTERA_STRUCTPACKALIGNCHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 781fef27c47617..53c3894914e528 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -67,6 +67,27 @@ The improvements are... Improvements to clang-tidy -------------------------- +New modules +^^^^^^^^^^^ + +- New :doc:`altera ` module. + + Includes checks related to OpenCL for FPGA coding guidelines, based on the + `Altera SDK for OpenCL: Best Practices Guide + `_. + +New checks +^^^^^^^^^^ + +- New :doc:`altera-struct-pack-align + ` check. + + Finds structs that are inefficiently packed or aligned, and recommends + packing and/or aligning of said structs as needed. + +- New :doc:`bugprone-misplaced-pointer-arithmetic-in-alloc + ` check. + - New :doc:`bugprone-redundant-branch-condition ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst b/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst new file mode 100644 index 00000000000000..b03a4fcf7fcf33 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/altera-struct-pack-align.rst @@ -0,0 +1,54 @@ +.. title:: clang-tidy - altera-struct-pack-align + +altera-struct-pack-align +======================== + +Finds structs that are inefficiently packed or aligned, and recommends +packing and/or aligning of said structs as needed. + +Structs that are not packed take up more space than they should, and accessing +structs that are not well aligned is inefficient. + +Fix-its are provided to fix both of these issues by inserting and/or amending +relevant struct attributes. + +Based on the `Altera SDK for OpenCL: Best Practices Guide +`_. + +.. code-block:: c++ + + // The following struct is originally aligned to 4 bytes, and thus takes up + // 12 bytes of memory instead of 10. Packing the struct will make it use + // only 10 bytes of memory, and aligning it to 16 bytes will make it + // efficient to access. + struct example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + }; + + // The following struct is arranged in such a way that packing is not needed. + // However, it is aligned to 4 bytes instead of 8, and thus needs to be + // explicitly aligned. + struct implicitly_packed_example { + char a; // 1 byte + char b; // 1 byte + char c; // 1 byte + char d; // 1 byte + int e; // 4 bytes + }; + + // The following struct is explicitly aligned and packed. + struct good_example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + } __attribute__((packed)) __attribute__((aligned(16)); + + // Explicitly aligning a struct to the wrong value will result in a warning. + // The following example should be aligned to 16 bytes, not 32. + struct badly_aligned_example { + char a; // 1 byte + double b; // 8 bytes + char c; // 1 byte + } __attribute__((packed)) __attribute__((aligned(32))); diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 91414ee8c90f32..c569ce704d979e 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -30,6 +30,7 @@ Clang-Tidy Checks `abseil-time-comparison `_, "Yes" `abseil-time-subtraction `_, "Yes" `abseil-upgrade-duration-conversions `_, "Yes" + `altera-struct-pack-align `_, `android-cloexec-accept `_, "Yes" `android-cloexec-accept4 `_, `android-cloexec-creat `_, "Yes" diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst index b9a4a7d694b4f0..a85c721541784a 100644 --- a/clang-tools-extra/docs/clang-tidy/index.rst +++ b/clang-tools-extra/docs/clang-tidy/index.rst @@ -58,6 +58,7 @@ There are currently the following groups of checks: Name prefix Description ====================== ========================================================= ``abseil-`` Checks related to Abseil library. +``altera-`` Checks related to OpenCL programming for FPGAs. ``android-`` Checks related to Android. ``boost-`` Checks related to Boost library. ``bugprone-`` Checks that target bugprone code constructs. diff --git a/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp b/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp new file mode 100644 index 00000000000000..615b6cafe87a25 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/altera-struct-pack-align.cpp @@ -0,0 +1,101 @@ +// RUN: %check_clang_tidy %s altera-struct-pack-align %t -- -header-filter=.* + +// Struct needs both alignment and packing +struct error { + char a; + double b; + char c; +}; +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'error' is inefficient due to padding; only needs 10 bytes but is using 24 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((packed))" to reduce the amount of padding applied to struct 'error' +// CHECK-MESSAGES: :[[@LINE-7]]:8: warning: accessing fields in struct 'error' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-8]]:8: note: use "__attribute__((aligned(16)))" to align struct 'error' to 16 bytes +// CHECK-FIXES: __attribute__((packed)) +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is explicitly packed, but needs alignment +struct error_packed { + char a; + double b; + char c; +} __attribute__((packed)); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'error_packed' is inefficient due to poor alignment; currently aligned to 1 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'error_packed' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))) + +// Struct is properly packed, but needs alignment +struct align_only { + char a; + char b; + char c; + char d; + int e; + double f; +}; +// CHECK-MESSAGES: :[[@LINE-8]]:8: warning: accessing fields in struct 'align_only' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-9]]:8: note: use "__attribute__((aligned(16)))" to align struct 'align_only' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is perfectly packed but wrongly aligned +struct bad_align { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(8))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align' is inefficient due to poor alignment; currently aligned to 8 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +struct bad_align2 { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(32))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align2' is inefficient due to poor alignment; currently aligned to 32 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align2' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +struct bad_align3 { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(4))); +// CHECK-MESSAGES: :[[@LINE-5]]:8: warning: accessing fields in struct 'bad_align3' is inefficient due to poor alignment; currently aligned to 4 bytes, but recommended alignment is 16 bytes [altera-struct-pack-align] +// CHECK-MESSAGES: :[[@LINE-6]]:8: note: use "__attribute__((aligned(16)))" to align struct 'bad_align3' to 16 bytes +// CHECK-FIXES: __attribute__((aligned(16))); + +// Struct is both perfectly packed and aligned +struct success { + char a; + double b; + char c; +} __attribute__((packed)) __attribute__((aligned(16))); +//Should take 10 bytes and be aligned to 16 bytes + +// Struct is properly packed, and explicitly aligned +struct success2 { + int a; + int b; + int c; +} __attribute__((aligned(16))); + +// If struct is properly aligned, packing not needed +struct success3 { + char a; + double b; + char c; +} __attribute__((aligned(16))); + +// If struct is templated, warnings should not be triggered +template +struct success4 { + A a; + B b; + int c; +}; + +// Warnings should not trigger on struct instantiations +void no_trigger_on_instantiation() { + struct bad_align3 instantiated { 'a', 0.001, 'b' }; +} + From 9c9974c3ccb6468cc83f759240293538cf123fcd Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 8 Sep 2020 15:34:52 +0200 Subject: [PATCH 075/161] [clang] Limit the maximum level of fold-expr expansion. Introduce a new diagnostic, and respect the bracket-depth (256) by default. Differential Revision: https://reviews.llvm.org/D86936 --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 +++ clang/lib/Sema/TreeTransform.h | 13 +++++++++++++ clang/test/SemaCXX/fold_expr_expansion_limit.cpp | 9 +++++++++ 3 files changed, 25 insertions(+) create mode 100644 clang/test/SemaCXX/fold_expr_expansion_limit.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e1601da74b735a..ec0c0fd9fa8ceb 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5092,6 +5092,9 @@ def err_fold_expression_empty : Error< "with no fallback value">; def err_fold_expression_bad_operand : Error< "expression not permitted as operand of fold expression">; +def err_fold_expression_limit_exceeded: Error< + "instantiating fold expression with %0 arguments exceeded expression nesting " + "limit of %1">, DefaultFatal, NoSFINAE; def err_unexpected_typedef : Error< "unexpected type name %0: expected expression">; diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 4c8293f3bf4c00..6457b192477e37 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -28,6 +28,7 @@ #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/DiagnosticParse.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Sema/Designator.h" #include "clang/Sema/Lookup.h" @@ -13193,6 +13194,18 @@ TreeTransform::TransformCXXFoldExpr(CXXFoldExpr *E) { E->getEllipsisLoc(), RHS.get(), E->getEndLoc(), NumExpansions); } + // Formally a fold expression expands to nested parenthesized expressions. + // Enforce this limit to avoid creating trees so deep we can't safely traverse + // them. + if (NumExpansions && SemaRef.getLangOpts().BracketDepth < NumExpansions) { + SemaRef.Diag(E->getEllipsisLoc(), + clang::diag::err_fold_expression_limit_exceeded) + << *NumExpansions << SemaRef.getLangOpts().BracketDepth + << E->getSourceRange(); + SemaRef.Diag(E->getEllipsisLoc(), diag::note_bracket_depth); + return ExprError(); + } + // The transform has determined that we should perform an elementwise // expansion of the pattern. Do so. ExprResult Result = getDerived().TransformExpr(E->getInit()); diff --git a/clang/test/SemaCXX/fold_expr_expansion_limit.cpp b/clang/test/SemaCXX/fold_expr_expansion_limit.cpp new file mode 100644 index 00000000000000..600278da78287c --- /dev/null +++ b/clang/test/SemaCXX/fold_expr_expansion_limit.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -fsyntax-only -fbracket-depth 2 -verify -std=c++17 %s + +template struct seq { + constexpr bool zero() { return (true && ... && (V == 0)); }; // expected-error {{instantiating fold expression with 3 arguments exceeded expression nesting limit of 2}} \ + expected-note {{use -fbracket-depth}} +}; +constexpr unsigned N = 3; +auto x = __make_integer_seq{}; +static_assert(!x.zero(), ""); // expected-note {{in instantiation of member function}} From 51d30c3429fa0f46bf8c0e4a38840952c11be4f9 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 8 Sep 2020 15:40:14 +0200 Subject: [PATCH 076/161] [mlir][VectorOps] Fix more GCC5 weirdness VectorToSCF.cpp:515:47: error: specialization of 'template mlir::LogicalResult mlir::VectorTransferRewriter::matchAndRewrite(mlir::Operation*, mlir::PatternRewriter&) const' in different namespace [-fpermissive] --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 0a74472a49f6e2..c0d283d7af451b 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -497,6 +497,8 @@ static void emitWithBoundsChecks( inBoundsFun(scalarAccessExprs); } +namespace mlir { + /// Lowers TransferReadOp into a combination of: /// 1. local memory allocation; /// 2. perfect loop nest over: @@ -666,8 +668,6 @@ LogicalResult VectorTransferRewriter::matchAndRewrite( return success(); } -namespace mlir { - void populateVectorToSCFConversionPatterns( OwningRewritePatternList &patterns, MLIRContext *context, const VectorTransferToSCFOptions &options) { From 94cfbef0a74ec3e5490878dc417fea5ecfcf2a6a Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Tue, 8 Sep 2020 14:41:42 +0100 Subject: [PATCH 077/161] [NFC][ARM] Precommit test --- .../Thumb2/LowOverheadLoops/remat-vctp.ll | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll new file mode 100644 index 00000000000000..9178217a89e92a --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m -mattr=+mve.fp %s -o - | FileCheck %s + +define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { +; CHECK-LABEL: remat_vctp: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: ldrd lr, r12, [sp, #80] +; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: .LBB0_1: @ %bb6 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: subs.w r12, r12, #4 +; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vabs.s32 q5, q4 +; CHECK-NEXT: vcls.s32 q3, q5 +; CHECK-NEXT: vshl.u32 q5, q5, q3 +; CHECK-NEXT: vadd.i32 q3, q3, q2 +; CHECK-NEXT: vshr.u32 q6, q5, #24 +; CHECK-NEXT: vand q6, q6, q1 +; CHECK-NEXT: vldrw.u32 q7, [lr, q6, uxtw #2] +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 +; CHECK-NEXT: vqsub.s32 q6, q0, q6 +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 +; CHECK-NEXT: vqshl.s32 q6, q6, #1 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqsub.s32 q5, q0, q5 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqshl.s32 q5, q5, #1 +; CHECK-NEXT: vpt.s32 lt, q4, zr +; CHECK-NEXT: vnegt.s32 q5, q5 +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vstrwt.32 q4, [r2], #16 +; CHECK-NEXT: vstrwt.32 q3, [r3], #16 +; CHECK-NEXT: bgt .LBB0_1 +; CHECK-NEXT: @ %bb.2: @ %bb44 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: pop {r7, pc} +bb: + %i = zext i16 %arg5 to i32 + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i7 = phi i32* [ %arg3, %bb ], [ %i38, %bb6 ] + %i8 = phi i32 [ %i, %bb ], [ %i42, %bb6 ] + %i9 = phi i32* [ %arg2, %bb ], [ %i41, %bb6 ] + %i10 = phi i32* [ %arg1, %bb ], [ %i40, %bb6 ] + %i11 = phi i32* [ %arg, %bb ], [ %i39, %bb6 ] + %i12 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i8) + %i13 = bitcast i32* %i11 to <4 x i32>* + %i14 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i13, i32 4, <4 x i1> %i12, <4 x i32> zeroinitializer) + %i15 = bitcast i32* %i10 to <4 x i32>* + %i16 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i15, i32 4, <4 x i1> %i12, <4 x i32> zeroinitializer) + %i17 = icmp slt <4 x i32> %i16, zeroinitializer + %i18 = sub <4 x i32> zeroinitializer, %i16 + %i19 = select <4 x i1> %i17, <4 x i32> %i18, <4 x i32> %i16 + %i20 = tail call <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32> %i19) + %i21 = shl <4 x i32> %i19, %i20 + %i22 = add <4 x i32> %i20, + %i23 = lshr <4 x i32> %i21, + %i24 = and <4 x i32> %i23, + %i25 = tail call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %arg4, <4 x i32> %i24, i32 32, i32 2, i32 0) + %i26 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i21) + %i27 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i26) + %i28 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i27) + %i29 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i28, i32 1, i32 0) + %i30 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i21) + %i31 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i30) + %i32 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i31) + %i33 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i32, i32 1, i32 0) + %i34 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %i33, <4 x i1> %i17, <4 x i32> %i33) + %i35 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i14, <4 x i32> %i34) + %i36 = bitcast i32* %i9 to <4 x i32>* + %i37 = bitcast i32* %i7 to <4 x i32>* + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i35, <4 x i32>* %i36, i32 4, <4 x i1> %i12) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i22, <4 x i32>* %i37, i32 4, <4 x i1> %i12) + %i38 = getelementptr inbounds i32, i32* %i7, i32 4 + %i39 = getelementptr inbounds i32, i32* %i11, i32 4 + %i40 = getelementptr inbounds i32, i32* %i10, i32 4 + %i41 = getelementptr inbounds i32, i32* %i9, i32 4 + %i42 = add nsw i32 %i8, -4 + %i43 = icmp sgt i32 %i8, 4 + br i1 %i43, label %bb6, label %bb44 + +bb44: ; preds = %bb6 + ret void +} + +declare <4 x i1> @llvm.arm.mve.vctp32(i32) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) +declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>) +declare <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32>) +declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32) +declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32) +declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) From c7b7c32f4a25d15e992215c8524871bef47d959b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 4 Sep 2020 16:44:58 +0100 Subject: [PATCH 078/161] [DSE,MemorySSA] Increase walker limit a bit. This slightly bumps the walker limit so that it covers more cases while not increasing compile-time too much: http://llvm-compile-time-tracker.com/compare.php?from=0fc1c2b51ba0cfb9145139af35be638333865251&to=91144a50ea4fa82c0c877e77784f60371640b263&stat=instructions --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 49e811b298a605..892ba559e7903c 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -114,9 +114,9 @@ static cl::opt cl::desc("The number of memory instructions to scan for " "dead store elimination (default = 100)")); static cl::opt MemorySSAUpwardsStepLimit( - "dse-memoryssa-walklimit", cl::init(70), cl::Hidden, + "dse-memoryssa-walklimit", cl::init(90), cl::Hidden, cl::desc("The maximum number of steps while walking upwards to find " - "MemoryDefs that may be killed (default = 70)")); + "MemoryDefs that may be killed (default = 90)")); static cl::opt MemorySSAPartialStoreLimit( "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden, From e09e1d97c112ef9488b2f88db560d3d459c0652e Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 8 Sep 2020 10:00:24 -0400 Subject: [PATCH 079/161] [gn build] (manually) port 156b127945a8 --- .../clang-tools-extra/clang-tidy/BUILD.gn | 1 + .../clang-tidy/altera/BUILD.gn | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn index 81c9ec0ede11fe..18aa728b0db900 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn @@ -42,6 +42,7 @@ group("all-checks") { # If you add a check, also add it to ClangTidyForceLinker.h. deps = [ "//clang-tools-extra/clang-tidy/abseil", + "//clang-tools-extra/clang-tidy/altera", "//clang-tools-extra/clang-tidy/android", "//clang-tools-extra/clang-tidy/boost", "//clang-tools-extra/clang-tidy/bugprone", diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn new file mode 100644 index 00000000000000..52f2e3d5f23d68 --- /dev/null +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/altera/BUILD.gn @@ -0,0 +1,18 @@ +static_library("altera") { + output_name = "clangTidyAlteraModule" + configs += [ "//llvm/utils/gn/build:clang_code" ] + deps = [ + "//clang-tools-extra/clang-tidy", + "//clang-tools-extra/clang-tidy/utils", + "//clang/lib/AST", + "//clang/lib/ASTMatchers", + "//clang/lib/Analysis", + "//clang/lib/Basic", + "//clang/lib/Lex", + "//llvm/lib/Support", + ] + sources = [ + "AlteraTidyModule.cpp", + "StructPackAlignCheck.cpp", + ] +} From 9933188c90615c9c264ebb69117f09726e909a25 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 8 Sep 2020 10:02:00 -0400 Subject: [PATCH 080/161] StructPackAlignCheck: Fix a -Winconsistent-missing-override warning --- clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h index b903641247e3c9..510e03030590c0 100644 --- a/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h +++ b/clang-tools-extra/clang-tidy/altera/StructPackAlignCheck.h @@ -27,7 +27,7 @@ class StructPackAlignCheck : public ClangTidyCheck { MaxConfiguredAlignment(Options.get("MaxConfiguredAlignment", 128)) {} void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; - void storeOptions(ClangTidyOptions::OptionMap &Opts); + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; private: const unsigned MaxConfiguredAlignment; From 2d9d270e77918dfc19ad9b3150ee7d40eeb8ca79 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 8 Sep 2020 16:09:33 +0200 Subject: [PATCH 081/161] Revert 3e782bf809 "[Sema][MSVC] warn at dynamic_cast when /GR- is given" This caused more warnings than expected, see https://crbug.com/1126019 Also reverts the follow-up 7907e5516. > Differential Revision: https://reviews.llvm.org/D86369 --- clang/include/clang/Basic/DiagnosticGroups.td | 2 -- .../clang/Basic/DiagnosticSemaKinds.td | 6 ------ clang/lib/Sema/SemaCast.cpp | 12 ----------- clang/lib/Sema/SemaExprCXX.cpp | 6 ------ clang/test/SemaCXX/ms_no_dynamic_cast.cpp | 21 ------------------- clang/test/SemaCXX/no-rtti.cpp | 2 +- clang/test/SemaCXX/no_dynamic_cast.cpp | 21 ------------------- 7 files changed, 1 insertion(+), 69 deletions(-) delete mode 100644 clang/test/SemaCXX/ms_no_dynamic_cast.cpp delete mode 100644 clang/test/SemaCXX/no_dynamic_cast.cpp diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index a9bd52b8afcdfd..6b4dcc850612ed 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1235,5 +1235,3 @@ in addition with the pragmas or -fmax-tokens flag to get any warnings. } def WebAssemblyExceptionSpec : DiagGroup<"wasm-exception-spec">; - -def RTTI : DiagGroup<"rtti">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index ec0c0fd9fa8ceb..46f7ffc97ce779 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -7441,12 +7441,6 @@ def err_no_typeid_with_fno_rtti : Error< "use of typeid requires -frtti">; def err_no_dynamic_cast_with_fno_rtti : Error< "use of dynamic_cast requires -frtti">; -def warn_no_dynamic_cast_with_rtti_disabled: Warning< - "dynamic_cast will not work since RTTI data is disabled by " - "%select{-fno-rtti-data|/GR-}0">, InGroup; -def warn_no_typeid_with_rtti_disabled: Warning< - "typeid will not work since RTTI data is disabled by " - "%select{-fno-rtti-data|/GR-}0">, InGroup; def err_cannot_form_pointer_to_member_of_reference_type : Error< "cannot form a pointer-to-member to member %0 of reference type %1">; diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index b213fb756a6503..726900c59f20e4 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -890,18 +890,6 @@ void CastOperation::CheckDynamicCast() { return; } - // Warns when dynamic_cast is used with RTTI data disabled. - if (!Self.getLangOpts().RTTIData) { - bool MicrosoftABI = - Self.getASTContext().getTargetInfo().getCXXABI().isMicrosoft(); - bool isClangCL = Self.getDiagnostics().getDiagnosticOptions().getFormat() == - DiagnosticOptions::MSVC; - if (MicrosoftABI || !DestPointee->isVoidType()) - Self.Diag(OpRange.getBegin(), - diag::warn_no_dynamic_cast_with_rtti_disabled) - << isClangCL; - } - // Done. Everything else is run-time checks. Kind = CK_Dynamic; } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 8f8847e638040a..d1fcdf35452788 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -646,12 +646,6 @@ Sema::ActOnCXXTypeid(SourceLocation OpLoc, SourceLocation LParenLoc, return ExprError(Diag(OpLoc, diag::err_no_typeid_with_fno_rtti)); } - // Warns when typeid is used with RTTI data disabled. - if (!getLangOpts().RTTIData) - Diag(OpLoc, diag::warn_no_typeid_with_rtti_disabled) - << (getDiagnostics().getDiagnosticOptions().getFormat() == - DiagnosticOptions::MSVC); - QualType TypeInfoType = Context.getTypeDeclType(CXXTypeInfoDecl); if (isType) { diff --git a/clang/test/SemaCXX/ms_no_dynamic_cast.cpp b/clang/test/SemaCXX/ms_no_dynamic_cast.cpp deleted file mode 100644 index d2c007fd8c297b..00000000000000 --- a/clang/test/SemaCXX/ms_no_dynamic_cast.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 %s -triple x86_64-windows -fdiagnostics-format msvc -fno-rtti-data -fsyntax-only -verify - -namespace std { -struct type_info {}; -} // namespace std -class B { -public: - virtual ~B() = default; -}; - -class D1 : public B { -public: - ~D1() = default; -}; - -void f() { - B* b = new D1(); - auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} - void* v = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by /GR-}} - (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by /GR-}} -} diff --git a/clang/test/SemaCXX/no-rtti.cpp b/clang/test/SemaCXX/no-rtti.cpp index f8487a0902dda2..e0b57153c24c9b 100644 --- a/clang/test/SemaCXX/no-rtti.cpp +++ b/clang/test/SemaCXX/no-rtti.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -fno-rtti %s +// RUN: %clang_cc1 -fsyntax-only -verify -fno-rtti %s namespace std { class type_info; diff --git a/clang/test/SemaCXX/no_dynamic_cast.cpp b/clang/test/SemaCXX/no_dynamic_cast.cpp deleted file mode 100644 index 074b02f4668bcf..00000000000000 --- a/clang/test/SemaCXX/no_dynamic_cast.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -fno-rtti-data -fsyntax-only -verify - -namespace std { -struct type_info {}; -} // namespace std -class B { -public: - virtual ~B() = default; -}; - -class D1 : public B { -public: - ~D1() = default; -}; - -void f() { - B* b = new D1(); - auto d = dynamic_cast(b); // expected-warning{{dynamic_cast will not work since RTTI data is disabled by -fno-rtti-data}} - void* v = dynamic_cast(b); - (void)typeid(int); // expected-warning{{typeid will not work since RTTI data is disabled by -fno-rtti-data}} -} From 32ae37b038b16a1ff9c81428ae4f003377439a22 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 8 Sep 2020 16:26:48 +0200 Subject: [PATCH 082/161] [clang-tidy] Fix dynamic build failures after 156b127945a8c923d141e608b7380427da024376 --- clang-tools-extra/clang-tidy/altera/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt index 45131c1809a23d..878e718c659637 100644 --- a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -5,11 +5,15 @@ add_clang_library(clangTidyAlteraModule StructPackAlignCheck.cpp LINK_LIBS + clangTidy + clangTidyUtils + ) + +clang_target_link_libraries(clangTidyAlteraModule + PRIVATE clangAnalysis clangAST clangASTMatchers clangBasic clangLex - clangTidy - clangTidyUtils ) From 6dc3e22b575267d2ede36f741bb9eb2455f36cff Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Wed, 19 Aug 2020 12:01:03 +0200 Subject: [PATCH 083/161] [DAGTypeLegalizer] Handle ZERO_EXTEND of promoted type in WidenVecRes_Convert. On SystemZ, a ZERO_EXTEND of an i1 vector handled by WidenVecRes_Convert() always ended up being scalarized, because the type action of the input is promotion which was previously an unhandled case in this method. This fixes https://bugs.llvm.org/show_bug.cgi?id=47132. Differential Revision: https://reviews.llvm.org/D86268 Patch by Eli Friedman. Review: Ulrich Weigand --- .../SelectionDAG/LegalizeVectorTypes.cpp | 23 +++++++++++++++---- llvm/test/CodeGen/SystemZ/vec-zext.ll | 16 +++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 093f7b1680edd6..764472e570c047 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3307,19 +3307,34 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + LLVMContext &Ctx = *DAG.getContext(); SDValue InOp = N->getOperand(0); SDLoc DL(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); unsigned Opcode = N->getOpcode(); - unsigned InVTNumElts = InVT.getVectorNumElements(); const SDNodeFlags Flags = N->getFlags(); + + // Handle the case of ZERO_EXTEND where the promoted InVT element size does + // not equal that of WidenVT. + if (N->getOpcode() == ISD::ZERO_EXTEND && + getTypeAction(InVT) == TargetLowering::TypePromoteInteger && + TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() != + WidenVT.getScalarSizeInBits()) { + InOp = ZExtPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits()) + Opcode = ISD::TRUNCATE; + } + + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); diff --git a/llvm/test/CodeGen/SystemZ/vec-zext.ll b/llvm/test/CodeGen/SystemZ/vec-zext.ll index b4c8f2307b0b7a..cb61d31e5ebe31 100644 --- a/llvm/test/CodeGen/SystemZ/vec-zext.ll +++ b/llvm/test/CodeGen/SystemZ/vec-zext.ll @@ -92,3 +92,19 @@ define <8 x i16> @fun10(<8 x i8> %val1) { ret <8 x i16> %z } +define <2 x i32> @fun11(<2 x i64> %Arg1, <2 x i64> %Arg2) { +; CHECK-LABEL: fun11: +; CHECK: vgbm %v0, 0 +; CHECK-NEXT: vceqg %v1, %v24, %v0 +; CHECK-NEXT: vceqg %v0, %v26, %v0 +; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vrepig %v1, 1 +; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vpkg %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %i3 = icmp eq <2 x i64> %Arg1, zeroinitializer + %i5 = icmp eq <2 x i64> %Arg2, zeroinitializer + %i6 = or <2 x i1> %i3, %i5 + %i7 = zext <2 x i1> %i6 to <2 x i32> + ret <2 x i32> %i7 +} From 6454140ab34cb29cc0b9de4f1e80199d717f1a97 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 8 Sep 2020 11:17:10 -0400 Subject: [PATCH 084/161] [libc++] Make sure we always print all available features Previously, we'd only print the features added through the new config, however printing all the features is important for debugging purposes. --- libcxx/utils/libcxx/test/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index d54ee8fa32913e..82b696f76eec78 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -148,6 +148,8 @@ def configure(self): self.lit_config ) + self.lit_config.note("All available features: {}".format(self.config.available_features)) + def print_config_info(self): if self.cxx.use_modules: self.lit_config.note('Using modules flags: %s' % From c2f6a0012882ba9b39ccee53f3d7f4f1aedf2181 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 8 Sep 2020 11:29:32 -0400 Subject: [PATCH 085/161] [libc++] Allow overriding the cached value of LIBCXX_TEST_CONFIG --- libcxx/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index a5c32d94aea295..8e7df5d19610e6 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -96,7 +96,7 @@ option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS}) option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF) option(LIBCXX_TEST_GDB_PRETTY_PRINTERS "Test gdb pretty printers." OFF) set(LIBCXX_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/configs/legacy.cfg.in" CACHE STRING - "The Lit testing configuration to use when running the tests." FORCE) # TODO: Stop using 'FORCE' once we can assume all CMake build dirs have been re-generated + "The Lit testing configuration to use when running the tests.") set(LIBCXX_TEST_PARAMS "" CACHE STRING "A list of parameters to run the Lit test suite with.") From c81dd3d159ab03d46e4280c458d3c29e56648218 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Tue, 8 Sep 2020 16:39:11 +0100 Subject: [PATCH 086/161] [clang-format] Handle shifts within conditions In some situation shifts can be treated as a template, and is thus formatted as one. So, by doing a couple extra checks to assure that the condition doesn't contain a template, and is in fact a bit shift should solve this problem. This is a fix for [[ https://bugs.llvm.org/show_bug.cgi?id=46969 | bug 46969 ]] Reviewed By: MyDeveloperDay Patch By: Saldivarcher Differential Revision: https://reviews.llvm.org/D86581 --- clang/lib/Format/TokenAnnotator.cpp | 20 +++++++++++++------- clang/unittests/Format/FormatTest.cpp | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 5dd6a7a9da40b0..841f0b41e9a7fc 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -56,6 +56,13 @@ static bool isLambdaParameterList(const FormatToken *Left) { Left->Previous->MatchingParen->is(TT_LambdaLSquare); } +/// Returns \c true if the token is followed by a boolean condition, \c false +/// otherwise. +static bool isKeywordWithCondition(const FormatToken &Tok) { + return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, + tok::kw_constexpr, tok::kw_catch); +} + /// A parser that gathers additional information about tokens. /// /// The \c TokenAnnotator tries to match parenthesis and square brakets and @@ -108,6 +115,12 @@ class AnnotatingParser { while (CurrentToken) { if (CurrentToken->is(tok::greater)) { + // Try to do a better job at looking for ">>" within the condition of + // a statement. + if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) && + Left->ParentBracket != tok::less && + isKeywordWithCondition(*Line.First)) + return false; Left->MatchingParen = CurrentToken; CurrentToken->MatchingParen = Left; // In TT_Proto, we must distignuish between: @@ -2768,13 +2781,6 @@ bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const { Right.ParameterCount > 0); } -/// Returns \c true if the token is followed by a boolean condition, \c false -/// otherwise. -static bool isKeywordWithCondition(const FormatToken &Tok) { - return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch, - tok::kw_constexpr, tok::kw_catch); -} - bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, const FormatToken &Right) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index b198efa4af9ecd..98e002003159c9 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -7565,6 +7565,21 @@ TEST_F(FormatTest, UnderstandsTemplateParameters) { verifyFormat("static_assert(is_convertible::value, \"AAA\");"); verifyFormat("Constructor(A... a) : a_(X{std::forward(a)}...) {}"); verifyFormat("< < < < < < < < < < < < < < < < < < < < < < < < < < < < < <"); + verifyFormat("some_templated_type"); +} + +TEST_F(FormatTest, UnderstandsShiftOperators) { + verifyFormat("if (i < x >> 1)"); + verifyFormat("while (i < x >> 1)"); + verifyFormat("for (unsigned i = 0; i < i; ++i, v = v >> 1)"); + verifyFormat("for (unsigned i = 0; i < x >> 1; ++i, v = v >> 1)"); + verifyFormat( + "for (std::vector::iterator i = 0; i < x >> 1; ++i, v = v >> 1)"); + verifyFormat("Foo.call>()"); + verifyFormat("if (Foo.call>() == 0)"); + verifyFormat("for (std::vector>::iterator i = 0; i < x >> 1; " + "++i, v = v >> 1)"); + verifyFormat("if (w>, 1>::t)"); } TEST_F(FormatTest, BitshiftOperatorWidth) { From 487a80531006add8102d50dbcce4b6fd729ab1f6 Mon Sep 17 00:00:00 2001 From: Ronak Chauhan Date: Mon, 7 Sep 2020 14:40:00 +0530 Subject: [PATCH 087/161] [AMDGPU] Support disassembly for AMDGPU kernel descriptors Decode AMDGPU Kernel descriptors as assembler directives. Reviewed By: scott.linder, jhenderson, kzhuravl Differential Revision: https://reviews.llvm.org/D80713 --- .../llvm/Support/AMDHSAKernelDescriptor.h | 70 ++-- .../Disassembler/AMDGPUDisassembler.cpp | 345 ++++++++++++++++++ .../AMDGPU/Disassembler/AMDGPUDisassembler.h | 30 +- llvm/test/CodeGen/AMDGPU/nop-data.ll | 4 +- .../llvm-objdump/ELF/AMDGPU/kd-failure.s | 37 ++ .../tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s | 49 +++ .../tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s | 36 ++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s | 58 +++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s | 53 +++ .../llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s | 41 +++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 17 - 11 files changed, 690 insertions(+), 50 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h index d1c2147536a721..48a09ac48005df 100644 --- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h +++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -162,39 +162,49 @@ struct kernel_descriptor_t { uint8_t reserved2[6]; }; +enum : uint32_t { + GROUP_SEGMENT_FIXED_SIZE_OFFSET = 0, + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET = 4, + RESERVED0_OFFSET = 8, + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET = 16, + RESERVED1_OFFSET = 24, + COMPUTE_PGM_RSRC3_OFFSET = 44, + COMPUTE_PGM_RSRC1_OFFSET = 48, + COMPUTE_PGM_RSRC2_OFFSET = 52, + KERNEL_CODE_PROPERTIES_OFFSET = 56, + RESERVED2_OFFSET = 58, +}; + static_assert( sizeof(kernel_descriptor_t) == 64, "invalid size for kernel_descriptor_t"); -static_assert( - offsetof(kernel_descriptor_t, group_segment_fixed_size) == 0, - "invalid offset for group_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, private_segment_fixed_size) == 4, - "invalid offset for private_segment_fixed_size"); -static_assert( - offsetof(kernel_descriptor_t, reserved0) == 8, - "invalid offset for reserved0"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == 16, - "invalid offset for kernel_code_entry_byte_offset"); -static_assert( - offsetof(kernel_descriptor_t, reserved1) == 24, - "invalid offset for reserved1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44, - "invalid offset for compute_pgm_rsrc3"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48, - "invalid offset for compute_pgm_rsrc1"); -static_assert( - offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == 52, - "invalid offset for compute_pgm_rsrc2"); -static_assert( - offsetof(kernel_descriptor_t, kernel_code_properties) == 56, - "invalid offset for kernel_code_properties"); -static_assert( - offsetof(kernel_descriptor_t, reserved2) == 58, - "invalid offset for reserved2"); +static_assert(offsetof(kernel_descriptor_t, group_segment_fixed_size) == + GROUP_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for group_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, private_segment_fixed_size) == + PRIVATE_SEGMENT_FIXED_SIZE_OFFSET, + "invalid offset for private_segment_fixed_size"); +static_assert(offsetof(kernel_descriptor_t, reserved0) == RESERVED0_OFFSET, + "invalid offset for reserved0"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_entry_byte_offset) == + KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET, + "invalid offset for kernel_code_entry_byte_offset"); +static_assert(offsetof(kernel_descriptor_t, reserved1) == RESERVED1_OFFSET, + "invalid offset for reserved1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == + COMPUTE_PGM_RSRC3_OFFSET, + "invalid offset for compute_pgm_rsrc3"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == + COMPUTE_PGM_RSRC1_OFFSET, + "invalid offset for compute_pgm_rsrc1"); +static_assert(offsetof(kernel_descriptor_t, compute_pgm_rsrc2) == + COMPUTE_PGM_RSRC2_OFFSET, + "invalid offset for compute_pgm_rsrc2"); +static_assert(offsetof(kernel_descriptor_t, kernel_code_properties) == + KERNEL_CODE_PROPERTIES_OFFSET, + "invalid offset for kernel_code_properties"); +static_assert(offsetof(kernel_descriptor_t, reserved2) == RESERVED2_OFFSET, + "invalid offset for reserved2"); } // end namespace amdhsa } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 9c2f2e7eecd14f..840208169168e4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -34,6 +34,7 @@ #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/AMDHSAKernelDescriptor.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -1215,6 +1216,350 @@ bool AMDGPUDisassembler::isGFX10() const { return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; } +//===----------------------------------------------------------------------===// +// AMDGPU specific symbol handling +//===----------------------------------------------------------------------===// +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + + // We cannot accurately backward compute #VGPRs used from + // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same + // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we + // simply calculate the inverse of what the assembler does. + + uint32_t GranulatedWorkitemVGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT; + + uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) * + AMDGPU::IsaInfo::getVGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n'; + + // We cannot backward compute values used to calculate + // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following + // directives can't be computed: + // .amdhsa_reserve_vcc + // .amdhsa_reserve_flat_scratch + // .amdhsa_reserve_xnack_mask + // They take their respective default values if not specified in the assembly. + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK) + // + // We compute the inverse as though all directives apart from NEXT_FREE_SGPR + // are set to 0. So while disassembling we consider that: + // + // GRANULATED_WAVEFRONT_SGPR_COUNT + // = f(NEXT_FREE_SGPR + 0 + 0 + 0) + // + // The disassembler cannot recover the original values of those 3 directives. + + uint32_t GranulatedWavefrontSGPRCount = + (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >> + COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT; + + if (isGFX10() && GranulatedWavefrontSGPRCount) + return MCDisassembler::Fail; + + uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) * + AMDGPU::IsaInfo::getSGPREncodingGranule(&STI); + + KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n'; + KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n'; + KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n"; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIORITY) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_float_round_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32); + PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64", + COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_PRIV) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_dx10_clamp", COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_DEBUG_MODE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_ieee_mode", COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_BULKY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL); + + if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0) + return MCDisassembler::Fail; + + if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode", + COMPUTE_PGM_RSRC1_WGP_MODE); + PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED); + PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS); + } + return MCDisassembler::Success; +} + +// NOLINTNEXTLINE(readability-identifier-naming) +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2( + uint32_t FourByteBuffer, raw_string_ostream &KdStream) const { + using namespace amdhsa; + StringRef Indent = "\t"; + PRINT_DIRECTIVE( + ".amdhsa_system_sgpr_private_segment_wavefront_offset", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z); + PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info", + COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO); + PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id", + COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY) + return MCDisassembler::Fail; + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE) + return MCDisassembler::Fail; + + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_invalid_op", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION); + PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE); + PRINT_DIRECTIVE( + ".amdhsa_exception_fp_ieee_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW); + PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT); + PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero", + COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO); + + if (FourByteBuffer & COMPUTE_PGM_RSRC2_RESERVED0) + return MCDisassembler::Fail; + + return MCDisassembler::Success; +} + +#undef PRINT_DIRECTIVE + +MCDisassembler::DecodeStatus +AMDGPUDisassembler::decodeKernelDescriptorDirective( + DataExtractor::Cursor &Cursor, ArrayRef Bytes, + raw_string_ostream &KdStream) const { +#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \ + do { \ + KdStream << Indent << DIRECTIVE " " \ + << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \ + } while (0) + + uint16_t TwoByteBuffer = 0; + uint32_t FourByteBuffer = 0; + uint64_t EightByteBuffer = 0; + + StringRef ReservedBytes; + StringRef Indent = "\t"; + + assert(Bytes.size() == 64); + DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8); + + switch (Cursor.tell()) { + case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer + << '\n'; + return MCDisassembler::Success; + + case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + KdStream << Indent << ".amdhsa_private_segment_fixed_size " + << FourByteBuffer << '\n'; + return MCDisassembler::Success; + + case amdhsa::RESERVED0_OFFSET: + // 8 reserved bytes, must be 0. + EightByteBuffer = DE.getU64(Cursor); + if (EightByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET: + // KERNEL_CODE_ENTRY_BYTE_OFFSET + // So far no directive controls this for Code Object V3, so simply skip for + // disassembly. + DE.skip(Cursor, 8); + return MCDisassembler::Success; + + case amdhsa::RESERVED1_OFFSET: + // 20 reserved bytes, must be 0. + ReservedBytes = DE.getBytes(Cursor, 20); + for (int I = 0; I < 20; ++I) { + if (ReservedBytes[I] != 0) { + return MCDisassembler::Fail; + } + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC3_OFFSET: + // COMPUTE_PGM_RSRC3 + // - Only set for GFX10, GFX6-9 have this to be 0. + // - Currently no directives directly control this. + FourByteBuffer = DE.getU32(Cursor); + if (!isGFX10() && FourByteBuffer) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC1_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::COMPUTE_PGM_RSRC2_OFFSET: + FourByteBuffer = DE.getU32(Cursor); + if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) == + MCDisassembler::Fail) { + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET: + using namespace amdhsa; + TwoByteBuffer = DE.getU16(Cursor); + + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT); + PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size", + KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0) + return MCDisassembler::Fail; + + // Reserved for GFX9 + if (isGFX9() && + (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) { + return MCDisassembler::Fail; + } else if (isGFX10()) { + PRINT_DIRECTIVE(".amdhsa_wavefront_size32", + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); + } + + if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) + return MCDisassembler::Fail; + + return MCDisassembler::Success; + + case amdhsa::RESERVED2_OFFSET: + // 6 bytes from here are reserved, must be 0. + ReservedBytes = DE.getBytes(Cursor, 6); + for (int I = 0; I < 6; ++I) { + if (ReservedBytes[I] != 0) + return MCDisassembler::Fail; + } + return MCDisassembler::Success; + + default: + llvm_unreachable("Unhandled index. Case statements cover everything."); + return MCDisassembler::Fail; + } +#undef PRINT_DIRECTIVE +} + +MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor( + StringRef KdName, ArrayRef Bytes, uint64_t KdAddress) const { + // CP microcode requires the kernel descriptor to be 64 aligned. + if (Bytes.size() != 64 || KdAddress % 64 != 0) + return MCDisassembler::Fail; + + std::string Kd; + raw_string_ostream KdStream(Kd); + KdStream << ".amdhsa_kernel " << KdName << '\n'; + + DataExtractor::Cursor C(0); + while (C && C.tell() < Bytes.size()) { + MCDisassembler::DecodeStatus Status = + decodeKernelDescriptorDirective(C, Bytes, KdStream); + + cantFail(C.takeError()); + + if (Status == MCDisassembler::Fail) + return MCDisassembler::Fail; + } + KdStream << ".end_amdhsa_kernel\n"; + outs() << KdStream.str(); + return MCDisassembler::Success; +} + +Optional +AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, uint64_t Address, + raw_ostream &CStream) const { + // Right now only kernel descriptor needs to be handled. + // We ignore all other symbols for target specific handling. + // TODO: + // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code + // Object V2 and V3 when symbols are marked protected. + + // amd_kernel_code_t for Code Object V2. + if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) { + Size = 256; + return MCDisassembler::Fail; + } + + // Code Object V3 kernel descriptors. + StringRef Name = Symbol.Name; + if (Symbol.Type == ELF::STT_OBJECT && Name.endswith(StringRef(".kd"))) { + Size = 64; // Size = 64 regardless of success or failure. + return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address); + } + return None; +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index f975af409a096c..315602c35288c2 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -17,10 +17,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/Support/DataExtractor.h" #include #include @@ -66,6 +67,33 @@ class AMDGPUDisassembler : public MCDisassembler { DecodeStatus tryDecodeInst(const uint8_t* Table, MCInst &MI, uint64_t Inst, uint64_t Address) const; + Optional onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, + ArrayRef Bytes, + uint64_t Address, + raw_ostream &CStream) const override; + + DecodeStatus decodeKernelDescriptor(StringRef KdName, ArrayRef Bytes, + uint64_t KdAddress) const; + + DecodeStatus + decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, + ArrayRef Bytes, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC1. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC1. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + + /// Decode as directives that handle COMPUTE_PGM_RSRC2. + /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC2. + /// \param KdStream - Stream to write the disassembled directives to. + // NOLINTNEXTLINE(readability-identifier-naming) + DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, + raw_string_ostream &KdStream) const; + DecodeStatus convertSDWAInst(MCInst &MI) const; DecodeStatus convertDPP8Inst(MCInst &MI) const; DecodeStatus convertMIMGInst(MCInst &MI) const; diff --git a/llvm/test/CodeGen/AMDGPU/nop-data.ll b/llvm/test/CodeGen/AMDGPU/nop-data.ll index 7b6853acce2854..e21ca97e8ffca1 100644 --- a/llvm/test/CodeGen/AMDGPU/nop-data.ll +++ b/llvm/test/CodeGen/AMDGPU/nop-data.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=fiji -filetype=obj < %s | llvm-objdump -d - --mcpu=fiji | FileCheck %s ; CHECK: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel0() align 256 { entry: ret void @@ -80,7 +80,7 @@ entry: ; CHECK-EMPTY: ; CHECK-NEXT: : -; CHECK-NEXT: s_endpgm +; CHECK: s_endpgm define amdgpu_kernel void @kernel1(i32 addrspace(1)* addrspace(4)* %ptr.out) align 256 { entry: ret void diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s new file mode 100644 index 00000000000000..eee3fd4b7103e3 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-failure.s @@ -0,0 +1,37 @@ +;; Failure test. We create a malformed kernel descriptor (KD) by manually +;; setting the bytes, because one can't create a malformed KD using the +;; assembler directives. + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t.o + +; RUN: printf ".type my_kernel.kd, @object \nmy_kernel.kd:\n.size my_kernel.kd, 64\n" > %t1.sym_info +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t.o \ +; RUN: | tail -n +9 > %t1.sym_content +; RUN: cat %t1.sym_info %t1.sym_content > %t1.s + +; RUN: llvm-mc %t1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t-re-assemble.o +; RUN: diff %t.o %t-re-assemble.o + +;; Test failure by setting one of the reserved bytes to non-zero value. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x00FF000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s new file mode 100644 index 00000000000000..0b798a298d398e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-sgpr.s @@ -0,0 +1,49 @@ +;; Test disassembly for GRANULATED_WAVEFRONT_SGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + + +;--- 1.s +;; Only set next_free_sgpr. +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 42 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_reserve_vcc 0 +.end_amdhsa_kernel + +;--- 2.s +;; Only set other directives. +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel + +;--- 3.s +;; Set all affecting directives. +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 35 + .amdhsa_reserve_flat_scratch 1 + .amdhsa_reserve_xnack_mask 1 + .amdhsa_reserve_vcc 1 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s new file mode 100644 index 00000000000000..a8883d2f74be70 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-vgpr.s @@ -0,0 +1,36 @@ +;; Test disassembly for GRANULATED_WORKITEM_VGPR_COUNT in the kernel descriptor. + +; RUN: split-file %s %t.dir + +; RUN: llvm-mc %t.dir/1.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_1.kd %t1 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1-re-assemble +; RUN: diff %t1 %t1-re-assemble + +; RUN: llvm-mc %t.dir/2.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_2.kd %t2 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2-re-assemble +; RUN: diff %t2 %t2-re-assemble + +; RUN: llvm-mc %t.dir/3.s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3 +; RUN: llvm-objdump --disassemble-symbols=my_kernel_3.kd %t3 | tail -n +8 \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t3-re-assemble +; RUN: diff %t3 %t3-re-assemble + +;--- 1.s +.amdhsa_kernel my_kernel_1 + .amdhsa_next_free_vgpr 23 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 2.s +.amdhsa_kernel my_kernel_2 + .amdhsa_next_free_vgpr 14 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +;--- 3.s +.amdhsa_kernel my_kernel_3 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s new file mode 100644 index 00000000000000..803507a130c03e --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s @@ -0,0 +1,58 @@ +;; Entirely zeroed kernel descriptor (for GFX10). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj -o %t +; RUN: llvm-objdump -s -j .text %t | FileCheck --check-prefix=OBJDUMP %s + +;; TODO: +;; This file and kd-zeroed-raw.s should produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. But looks like the assembler sets +;; the FWD_PROGRESS bit in COMPUTE_PGM_RSRC1 to 1 even when the directive +;; mentions 0 (see line 36). + +;; Check the raw bytes right now. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 01000000 00000000 00000000 00000000 + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 8 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 8 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_workgroup_processor_mode 0 + .amdhsa_memory_ordered 0 + .amdhsa_forward_progress 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 + .amdhsa_wavefront_size32 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s new file mode 100644 index 00000000000000..de4fdf74d88e09 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx9.s @@ -0,0 +1,53 @@ +;; Entirely zeroed kernel descriptor (for GFX9). + +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: diff %t1 %t2 + +; RUN: llvm-objdump -s -j .text %t1 | FileCheck --check-prefix=OBJDUMP %s + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; This file and kd-zeroed-raw.s produce the same output for the kernel +;; descriptor - a block of 64 zeroed bytes. + +.amdhsa_kernel my_kernel + .amdhsa_group_segment_fixed_size 0 + .amdhsa_private_segment_fixed_size 0 + .amdhsa_next_free_vgpr 0 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_float_round_mode_32 0 + .amdhsa_float_round_mode_16_64 0 + .amdhsa_float_denorm_mode_32 0 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 0 + .amdhsa_system_sgpr_private_segment_wavefront_offset 0 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 0 + .amdhsa_system_sgpr_workgroup_id_z 0 + .amdhsa_system_sgpr_workgroup_info 0 + .amdhsa_system_vgpr_workitem_id 0 + .amdhsa_exception_fp_ieee_invalid_op 0 + .amdhsa_exception_fp_denorm_src 0 + .amdhsa_exception_fp_ieee_div_zero 0 + .amdhsa_exception_fp_ieee_overflow 0 + .amdhsa_exception_fp_ieee_underflow 0 + .amdhsa_exception_fp_ieee_inexact 0 + .amdhsa_exception_int_div_zero 0 + .amdhsa_user_sgpr_private_segment_buffer 0 + .amdhsa_user_sgpr_dispatch_ptr 0 + .amdhsa_user_sgpr_queue_ptr 0 + .amdhsa_user_sgpr_kernarg_segment_ptr 0 + .amdhsa_user_sgpr_dispatch_id 0 + .amdhsa_user_sgpr_flat_scratch_init 0 + .amdhsa_user_sgpr_private_segment_size 0 +.end_amdhsa_kernel diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s new file mode 100644 index 00000000000000..85554209d5d8fb --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-raw.s @@ -0,0 +1,41 @@ +; RUN: llvm-mc %s -mattr=+code-object-v3 --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t1 +; RUN: llvm-objdump --disassemble-symbols=my_kernel.kd %t1 \ +; RUN: | tail -n +8 | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -o %t2 +; RUN: llvm-objdump -s -j .text %t2 | FileCheck --check-prefix=OBJDUMP %s + +;; Not running lit-test over gfx10 (see kd-zeroed-gfx10.s for details). +;; kd-zeroed-raw.s and kd-zeroed-*.s should produce the same output for the +;; kernel descriptor - a block of 64 zeroed bytes. + +;; The disassembly will produce the contents of kd-zeroed-*.s which on being +;; assembled contains additional relocation info. A diff over the entire object +;; will fail in this case. So we check by looking the bytes in .text. + +; OBJDUMP: 0000 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +; OBJDUMP-NEXT: 0030 00000000 00000000 00000000 00000000 + +;; The entire object is zeroed out. + +.type my_kernel.kd, @object +.size my_kernel.kd, 64 +my_kernel.kd: + .long 0x00000000 ;; group_segment_fixed_size + .long 0x00000000 ;; private_segment_fixed_size + .quad 0x0000000000000000 ;; reserved bytes. + .quad 0x0000000000000000 ;; kernel_code_entry_byte_offset, any value works. + + ;; 20 reserved bytes. + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .long 0x00000000 + + .long 0x00000000 ;; compute_PGM_RSRC3 + .long 0x00000000 ;; compute_PGM_RSRC1 + .long 0x00000000 ;; compute_PGM_RSRC2 + .short 0x0000 ;; additional fields. + + ;; 6 reserved bytes. + .long 0x0000000 + .short 0x0000 diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b63d08b90ff51a..46ed7414dbb31e 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1854,23 +1854,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, outs() << SectionName << ":\n"; } - if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { - if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) - Start += 256; - } - if (SI == SE - 1 || - Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { - // cut trailing zeroes at the end of kernel - // cut up to 256 bytes - const uint64_t EndAlign = 256; - const auto Limit = End - (std::min)(EndAlign, End - Start); - while (End > Limit && - *reinterpret_cast(&Bytes[End - 4]) == 0) - End -= 4; - } - } - outs() << '\n'; if (!NoLeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", From 71133e8b5bceaf68a2cee59af371df570a1aed79 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Tue, 8 Sep 2020 09:20:06 -0700 Subject: [PATCH 088/161] [clang-tidy] Fix linking for FrontendOpenMP Without this, builds with `-DBUILD_SHARED_LIBS=ON` fail. --- clang-tools-extra/clang-tidy/altera/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt index 878e718c659637..ed28d9f4892d28 100644 --- a/clang-tools-extra/clang-tidy/altera/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/altera/CMakeLists.txt @@ -1,4 +1,7 @@ -set(LLVM_LINK_COMPONENTS support) +set(LLVM_LINK_COMPONENTS + FrontendOpenMP + support + ) add_clang_library(clangTidyAlteraModule AlteraTidyModule.cpp From e2394245eb28695d5eed5d7c015e99141993c723 Mon Sep 17 00:00:00 2001 From: Lubomir Litchev Date: Thu, 3 Sep 2020 13:15:39 -0700 Subject: [PATCH 089/161] Add an option for unrolling loops up to a factor. Currently, there is no option to allow for unrolling a loop up to a specific factor (specified by the user). The code for doing that is there and there are benefits when unrolling is done to smaller loops (smaller than the factor specified). Reviewed By: bondhugula Differential Revision: https://reviews.llvm.org/D87111 --- mlir/include/mlir/Dialect/Affine/Passes.h | 3 ++- mlir/include/mlir/Dialect/Affine/Passes.td | 2 ++ .../Dialect/Affine/Transforms/LoopUnroll.cpp | 14 +++++++----- mlir/lib/Transforms/Utils/LoopUtils.cpp | 1 - mlir/test/Dialect/SCF/loop-unroll.mlir | 22 +++++++++++++++++++ .../test/lib/Transforms/TestLoopUnrolling.cpp | 3 +++ 6 files changed, 38 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h index db1c3bfead94f7..580fbf53ae4f26 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.h +++ b/mlir/include/mlir/Dialect/Affine/Passes.h @@ -61,7 +61,8 @@ std::unique_ptr> createLoopTilingPass(); /// and no callback is provided, anything passed from the command-line (if at /// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor). std::unique_ptr> createLoopUnrollPass( - int unrollFactor = -1, bool unrollFull = false, + int unrollFactor = -1, bool unrollUpToFactor = false, + bool unrollFull = false, const std::function &getUnrollFactor = nullptr); /// Creates a loop unroll jam pass to unroll jam by the specified factor. A diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 0e7f3e43661eff..7515dbaa33d863 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -71,6 +71,8 @@ def AffineLoopUnroll : FunctionPass<"affine-loop-unroll"> { let options = [ Option<"unrollFactor", "unroll-factor", "unsigned", /*default=*/"4", "Use this unroll factor for all loops being unrolled">, + Option<"unrollUpToFactor", "unroll-up-to-factor", "bool", /*default=*/"false", + "Allow unroling up to the factor specicied">, Option<"unrollFull", "unroll-full", "bool", /*default=*/"false", "Fully unroll loops">, Option<"numRepetitions", "unroll-num-reps", "unsigned", /*default=*/"1", diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp index edb21384080f49..3dc236f3c06865 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -9,7 +9,6 @@ // This file implements loop unrolling. // //===----------------------------------------------------------------------===// - #include "PassDetail.h" #include "mlir/Analysis/LoopAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -45,11 +44,13 @@ struct LoopUnroll : public AffineLoopUnrollBase { : AffineLoopUnrollBase(other), getUnrollFactor(other.getUnrollFactor) {} explicit LoopUnroll( - Optional unrollFactor = None, bool unrollFull = false, + Optional unrollFactor = None, bool unrollUpToFactor = false, + bool unrollFull = false, const std::function &getUnrollFactor = nullptr) : getUnrollFactor(getUnrollFactor) { if (unrollFactor) this->unrollFactor = *unrollFactor; + this->unrollUpToFactor = unrollUpToFactor; this->unrollFull = unrollFull; } @@ -126,13 +127,16 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) { if (unrollFull) return loopUnrollFull(forOp); // Otherwise, unroll by the given unroll factor. + if (unrollUpToFactor) { + return loopUnrollUpToFactor(forOp, unrollFactor); + } return loopUnrollByFactor(forOp, unrollFactor); } std::unique_ptr> mlir::createLoopUnrollPass( - int unrollFactor, bool unrollFull, + int unrollFactor, bool unrollUpToFactor, bool unrollFull, const std::function &getUnrollFactor) { return std::make_unique( - unrollFactor == -1 ? None : Optional(unrollFactor), unrollFull, - getUnrollFactor); + unrollFactor == -1 ? None : Optional(unrollFactor), + unrollUpToFactor, unrollFull, getUnrollFactor); } diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index db6a071367d6c4..7ae45171ddbd3e 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -469,7 +469,6 @@ LogicalResult mlir::loopUnrollFull(AffineForOp forOp) { LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp, uint64_t unrollFactor) { Optional mayBeConstantTripCount = getConstantTripCount(forOp); - if (mayBeConstantTripCount.hasValue() && mayBeConstantTripCount.getValue() < unrollFactor) return loopUnrollByFactor(forOp, mayBeConstantTripCount.getValue()); diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir index 775188bf0ed991..134daa303ed86f 100644 --- a/mlir/test/Dialect/SCF/loop-unroll.mlir +++ b/mlir/test/Dialect/SCF/loop-unroll.mlir @@ -2,6 +2,7 @@ // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2 +// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index, %arg3: memref) { @@ -248,3 +249,24 @@ func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref) { // UNROLL-BY-3-NEXT: } // UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[C9]]] : memref // UNROLL-BY-3-NEXT: return + + +// Test unroll-up-to functionality. +func @static_loop_unroll_up_to_factor(%arg0 : memref) { + %0 = constant 7.0 : f32 + %lb = constant 0 : index + %ub = constant 2 : index + affine.for %i0 = %lb to %ub { + store %0, %arg0[%i0] : memref + } + return +} +// UNROLL-UP-TO-LABEL: func @static_loop_unroll_up_to_factor +// UNROLL-UP-TO-SAME: %[[MEM:.*0]]: memref +// UNROLL-UP-TO-DAG: %[[C0:.*]] = constant 0 : index +// UNROLL-UP-TO-DAG: %[[C2:.*]] = constant 2 : index +// UNROLL-UP-TO-NEXT: %[[V0:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: store %{{.*}}, %[[MEM]][%[[V0]]] : memref +// UNROLL-UP-TO-NEXT: %[[V1:.*]] = affine.apply {{.*}} +// UNROLL-UP-TO-NEXT: tore %{{.*}}, %[[MEM]][%[[V1]]] : memref +// UNROLL-UP-TO-NEXT: return diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp index 712fddb97028ed..396f08b2cba323 100644 --- a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp +++ b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp @@ -55,6 +55,9 @@ class TestLoopUnrollingPass Option unrollFactor{*this, "unroll-factor", llvm::cl::desc("Loop unroll factor."), llvm::cl::init(1)}; + Option unrollUpToFactor{*this, "unroll-up-to-factor", + llvm::cl::desc("Loop unroll up to factor."), + llvm::cl::init(false)}; Option loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."), llvm::cl::init(0)}; }; From 3c83b967cf223ce6a2e0813e48b64f7689512f20 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 17:21:28 +0100 Subject: [PATCH 090/161] LiveRegUnits.h - reduce MachineRegisterInfo.h include. NFC. We only need to include MachineInstrBundle.h, but exposes an implicit dependency in MachineOutliner.h. Also, remove duplicate includes from LiveRegUnits.cpp + MachineOutliner.cpp. --- llvm/include/llvm/CodeGen/LiveRegUnits.h | 2 +- llvm/include/llvm/CodeGen/MachineOutliner.h | 3 ++- llvm/lib/CodeGen/LiveRegUnits.cpp | 4 ---- llvm/lib/CodeGen/MachineOutliner.cpp | 2 -- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveRegUnits.h b/llvm/include/llvm/CodeGen/LiveRegUnits.h index 1ed091e3bb5e9d..e20e04cad35cc8 100644 --- a/llvm/include/llvm/CodeGen/LiveRegUnits.h +++ b/llvm/include/llvm/CodeGen/LiveRegUnits.h @@ -15,7 +15,7 @@ #define LLVM_CODEGEN_LIVEREGUNITS_H #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index 4a1b04ab3e8866..a5dbbdb4fdcd22 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -15,10 +15,11 @@ #ifndef LLVM_MACHINEOUTLINER_H #define LLVM_MACHINEOUTLINER_H +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/LivePhysRegs.h" namespace llvm { namespace outliner { diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp index b2731aa0e7dbca..ea2075bc139dfd 100644 --- a/llvm/lib/CodeGen/LiveRegUnits.cpp +++ b/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -11,15 +11,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveRegUnits.h" - #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/MC/MCRegisterInfo.h" using namespace llvm; diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index f9d099e029956d..715a2ba4667d23 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -59,10 +59,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" From d25c17f3175b344420c1f30040b206a47a512c9d Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Sun, 6 Sep 2020 10:36:07 -0700 Subject: [PATCH 091/161] [WebAssembly] Fix fixEndsAtEndOfFunction for try-catch When the function return type is non-void and `end` instructions are at the very end of a function, CFGStackify's `fixEndsAtEndOfFunction` function fixes the corresponding block/loop/try's type to match the function's return type. This is applied to consecutive `end` markers at the end of a function. For example, when the function return type is `i32`, ``` block i32 ;; return type is fixed to i32 ... loop i32 ;; return type is fixed to i32 ... end_loop end_block end_function ``` But try-catch is a little different, because it consists of two parts: a try part and a catch part, and both parts' return type should satisfy the function's return type. Which means, ``` try i32 ;; return type is fixed to i32 ... block i32 ;; this should be changed i32 too! ... end_block catch ... end_try end_function ``` As you can see in this example, it is not sufficient to only `end` instructions at the end of a function; in case of `try`, we should check instructions before `catch`es, in case their corresponding `try`'s type has been fixed. This changes `fixEndsAtEndOfFunction`'s algorithm to use a worklist that contains a reverse iterator, each of which is a starting point for a new backward `end` instruction search. Fixes https://bugs.llvm.org/show_bug.cgi?id=47413. Reviewed By: dschuff, tlively Differential Revision: https://reviews.llvm.org/D87207 --- .../WebAssembly/WebAssemblyCFGStackify.cpp | 72 ++++++++++++------- .../CodeGen/WebAssembly/cfg-stackify-eh.ll | 48 +++++++++++++ 2 files changed, 96 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 02330a2dd4afae..d5ee4b3b9440e5 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -178,6 +178,28 @@ getLatestInsertPos(MachineBasicBlock *MBB, return InsertPos; } +// Find a catch instruction and its destination register within an EH pad. +static MachineInstr *findCatch(MachineBasicBlock *EHPad, Register &ExnReg) { + assert(EHPad->isEHPad()); + MachineInstr *Catch = nullptr; + for (auto &MI : *EHPad) { + switch (MI.getOpcode()) { + case WebAssembly::CATCH: + Catch = &MI; + ExnReg = Catch->getOperand(0).getReg(); + break; + } + } + assert(Catch && "EH pad does not have a catch"); + assert(ExnReg != 0 && "Invalid register"); + return Catch; +} + +static MachineInstr *findCatch(MachineBasicBlock *EHPad) { + Register Dummy; + return findCatch(EHPad, Dummy); +} + void WebAssemblyCFGStackify::registerScope(MachineInstr *Begin, MachineInstr *End) { BeginToEnd[Begin] = End; @@ -1101,25 +1123,8 @@ bool WebAssemblyCFGStackify::fixUnwindMismatches(MachineFunction &MF) { continue; MachineBasicBlock *EHPad = P.first; - - // Find 'catch' and 'local.set' or 'drop' instruction that follows the - // 'catch'. If -wasm-disable-explicit-locals is not set, 'catch' should be - // always followed by either 'local.set' or a 'drop', because 'br_on_exn' is - // generated after 'catch' in LateEHPrepare and we don't support blocks - // taking values yet. - MachineInstr *Catch = nullptr; - unsigned ExnReg = 0; - for (auto &MI : *EHPad) { - switch (MI.getOpcode()) { - case WebAssembly::CATCH: - Catch = &MI; - ExnReg = Catch->getOperand(0).getReg(); - break; - } - } - assert(Catch && "EH pad does not have a catch"); - assert(ExnReg != 0 && "Invalid register"); - + Register ExnReg = 0; + MachineInstr *Catch = findCatch(EHPad, ExnReg); auto SplitPos = std::next(Catch->getIterator()); // Create a new BB that's gonna be the destination for branches from the @@ -1371,22 +1376,41 @@ void WebAssemblyCFGStackify::fixEndsAtEndOfFunction(MachineFunction &MF) { : WebAssembly::BlockType( WebAssembly::toValType(MFI.getResults().front())); - for (MachineBasicBlock &MBB : reverse(MF)) { - for (MachineInstr &MI : reverse(MBB)) { + SmallVector Worklist; + Worklist.push_back(MF.rbegin()->rbegin()); + + auto Process = [&](MachineBasicBlock::reverse_iterator It) { + auto *MBB = It->getParent(); + while (It != MBB->rend()) { + MachineInstr &MI = *It++; if (MI.isPosition() || MI.isDebugInstr()) continue; switch (MI.getOpcode()) { + case WebAssembly::END_TRY: { + // If a 'try''s return type is fixed, both its try body and catch body + // should satisfy the return type, so we need to search 'end' + // instructions before its corresponding 'catch' too. + auto *EHPad = TryToEHPad.lookup(EndToBegin[&MI]); + assert(EHPad); + Worklist.push_back(std::next(findCatch(EHPad)->getReverseIterator())); + LLVM_FALLTHROUGH; + } case WebAssembly::END_BLOCK: case WebAssembly::END_LOOP: - case WebAssembly::END_TRY: EndToBegin[&MI]->getOperand(0).setImm(int32_t(RetType)); continue; default: - // Something other than an `end`. We're done. + // Something other than an `end`. We're done for this BB. return; } } - } + // We've reached the beginning of a BB. Continue the search in the previous + // BB. + Worklist.push_back(MBB->getPrevNode()->rbegin()); + }; + + while (!Worklist.empty()) + Process(Worklist.pop_back_val()); } // WebAssembly functions end with an end instruction, as if the function body diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll index 887dc470b3bc8b..f78d56ca0b9620 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh.ll @@ -1023,6 +1023,54 @@ while.end: ; preds = %while.body, %while. ret void } +; When the function return type is non-void and 'end' instructions are at the +; very end of a function, CFGStackify's fixEndsAtEndOfFunction function fixes +; the corresponding block/loop/try's type to match the function's return type. +; But when a `try`'s type is fixed, we should also check `end` instructions +; before its corresponding `catch`, because both `try` and `catch` body should +; satisfy the return type requirements. + +; NOSORT-LABEL: test19 +; NOSORT: try i32 +; NOSORT: loop i32 +; NOSORT: end_loop +; NOSORT: catch +; NOSORT: end_try +; NOSORT-NEXT: end_function +define i32 @test19(i32 %n) personality i8* bitcast (i32 (...)* @__gxx_wasm_personality_v0 to i8*) { +entry: + %t = alloca %class.Object, align 1 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %n + br label %for.body + +for.body: ; preds = %for.cond + %div = sdiv i32 %n, 2 + %cmp1 = icmp eq i32 %i.0, %div + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %call = invoke i32 @baz() + to label %invoke.cont unwind label %ehcleanup + +invoke.cont: ; preds = %if.then + %call2 = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %t) #4 + ret i32 %call + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +ehcleanup: ; preds = %if.then + %0 = cleanuppad within none [] + %call3 = call %class.Object* @_ZN6ObjectD2Ev(%class.Object* %t) #4 [ "funclet"(token %0) ] + cleanupret from %0 unwind to caller +} + + ; Check if the unwind destination mismatch stats are correct ; NOSORT-STAT: 17 wasm-cfg-stackify - Number of EH pad unwind mismatches found From 1242dd330d9054a57c1403f16d5487f9e3a3a92f Mon Sep 17 00:00:00 2001 From: Volkan Keles Date: Tue, 8 Sep 2020 09:46:38 -0700 Subject: [PATCH 092/161] GlobalISel: Combine `op undef, x` to 0 https://reviews.llvm.org/D86611 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 ++ .../include/llvm/Target/GlobalISel/Combine.td | 7 +++++ .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 6 ++++ .../AArch64/GlobalISel/combine-shl.mir | 29 +++++++++++++++++++ 4 files changed, 45 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 8607ad02d50637..cff6b496cca279 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -321,6 +321,9 @@ class CombinerHelper { /// Check if operand \p OpIdx is zero. bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx); + /// Check if operand \p OpIdx is undef. + bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx); + /// Erase \p MI bool eraseInst(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 6a6f97ae78b04d..5b940551dad595 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -194,6 +194,12 @@ def undef_to_negative_one: GICombineRule< [{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]), (apply [{ Helper.replaceInstWithConstant(*${root}, -1); }])>; +def binop_left_undef_to_zero: GICombineRule< + (defs root:$root), + (match (wip_match_opcode G_SHL):$root, + [{ return Helper.matchOperandIsUndef(*${root}, 1); }]), + (apply [{ Helper.replaceInstWithConstant(*${root}, 0); }])>; + // Instructions where if any source operand is undef, the instruction can be // replaced with undef. def propagate_undef_any_op: GICombineRule< @@ -384,6 +390,7 @@ def not_cmp_fold : GICombineRule< // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, + binop_left_undef_to_zero, propagate_undef_any_op, propagate_undef_all_ops, propagate_undef_shuffle_mask, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 10cd58f17e9aaa..d58ba7cf5a8c65 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1989,6 +1989,12 @@ bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) { MRI); } +bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + return MO.isReg() && + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI); +} + bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) { assert(MI.getNumDefs() == 1 && "Expected only one def?"); Builder.setInstr(MI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir new file mode 100644 index 00000000000000..fe75f9965bc908 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shl.mir @@ -0,0 +1,29 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s +--- +name: test_combine_shl_undef_x_s32 +body: | + bb.1: + liveins: $w0 + ; CHECK-LABEL: name: test_combine_shl_undef_x_s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: $w0 = COPY [[C]](s32) + %0:_(s32) = COPY $w0 + %1:_(s32) = G_IMPLICIT_DEF + %2:_(s32) = G_SHL %1(s32), %0(s32) + $w0 = COPY %2(s32) +... +--- +name: test_combine_shl_undef_x_v2s32 +body: | + bb.1: + liveins: $d0 + ; CHECK-LABEL: name: test_combine_shl_undef_x_v2s32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + %0:_(<2 x s32>) = COPY $d0 + %1:_(<2 x s32>) = G_IMPLICIT_DEF + %2:_(<2 x s32>) = G_SHL %1(<2 x s32>), %0(<2 x s32>) + $d0 = COPY %2(<2 x s32>) +... From 514df1b2bb1ecd1a33327001ea38a347fd2d0380 Mon Sep 17 00:00:00 2001 From: Ties Stuij Date: Fri, 28 Aug 2020 15:08:02 +0100 Subject: [PATCH 093/161] [ARM] Follow AACPS standard for volatile bit-fields access width This patch resumes the work of D16586. According to the AAPCS, volatile bit-fields should be accessed using containers of the widht of their declarative type. In such case: ``` struct S1 { short a : 1; } ``` should be accessed using load and stores of the width (sizeof(short)), where now the compiler does only load the minimum required width (char in this case). However, as discussed in D16586, that could overwrite non-volatile bit-fields, which conflicted with C and C++ object models by creating data race conditions that are not part of the bit-field, e.g. ``` struct S2 { short a; int b : 16; } ``` Accessing `S2.b` would also access `S2.a`. The AAPCS Release 2020Q2 (https://documentation-service.arm.com/static/5efb7fbedbdee951c1ccf186?token=) section 8.1 Data Types, page 36, "Volatile bit-fields - preserving number and width of container accesses" has been updated to avoid conflict with the C++ Memory Model. Now it reads in the note: ``` This ABI does not place any restrictions on the access widths of bit-fields where the container overlaps with a non-bit-field member or where the container overlaps with any zero length bit-field placed between two other bit-fields. This is because the C/C++ memory model defines these as being separate memory locations, which can be accessed by two threads simultaneously. For this reason, compilers must be permitted to use a narrower memory access width (including splitting the access into multiple instructions) to avoid writing to a different memory location. For example, in struct S { int a:24; char b; }; a write to a must not also write to the location occupied by b, this requires at least two memory accesses in all current Arm architectures. In the same way, in struct S { int a:24; int:0; int b:8; };, writes to a or b must not overwrite each other. ``` Patch D16586 was updated to follow such behavior by verifying that we only change volatile bit-field access when: - it won't overlap with any other non-bit-field member - we only access memory inside the bounds of the record - avoid overlapping zero-length bit-fields. Regarding the number of memory accesses, that should be preserved, that will be implemented by D67399. Differential Revision: https://reviews.llvm.org/D72932 The following people contributed to this patch: - Diogo Sampaio - Ties Stuij --- clang/include/clang/Basic/CodeGenOptions.def | 6 +- clang/include/clang/Driver/Options.td | 8 +- clang/lib/CodeGen/CGExpr.cpp | 118 +- clang/lib/CodeGen/CGRecordLayout.h | 17 +- clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 166 +- clang/lib/Frontend/CompilerInvocation.cpp | 3 + clang/test/CodeGen/aapcs-bitfield.c | 3292 +++++++++++++++++- clang/test/CodeGen/bitfield-2.c | 12 +- 8 files changed, 3519 insertions(+), 103 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index ec77f68062e7a1..f2f29db2334e44 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -392,9 +392,13 @@ CODEGENOPT(Addrsig, 1, 0) /// Whether to emit unused static constants. CODEGENOPT(KeepStaticConsts, 1, 0) -/// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield +/// Whether to follow the AAPCS enforcing at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) +/// Whether to not follow the AAPCS that enforces volatile bit-field access width to be +/// according to the field declaring type width. +CODEGENOPT(AAPCSBitfieldWidth, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4ba5d40117e77c..81d63330b4279b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2363,9 +2363,15 @@ def mno_neg_immediates: Flag<["-"], "mno-neg-immediates">, Group, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Allow use of CMSE (Armv8-M Security Extensions)">; -def ForceAAPCSBitfieldLoad : Flag<["-"], "fAAPCSBitfieldLoad">, Group, +def ForceAAPCSBitfieldLoad : Flag<["-"], "faapcs-bitfield-load">, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Follows the AAPCS standard that all volatile bit-field write generates at least one load. (ARM only).">; +def ForceNoAAPCSBitfieldWidth : Flag<["-"], "fno-aapcs-bitfield-width">, Group, + Flags<[DriverOption,CC1Option]>, + HelpText<"Do not follow the AAPCS standard requirement that volatile bit-field width is dictated by the field container type. (ARM only).">; +def AAPCSBitfieldWidth : Flag<["-"], "faapcs-bitfield-width">, Group, + Flags<[DriverOption,CC1Option]>, + HelpText<"Follow the AAPCS standard requirement stating that volatile bit-field width is dictated by the field container type. (ARM only).">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 7351926035e64d..df024a84462dbd 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1927,22 +1927,27 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, llvm::Type *ResLTy = ConvertType(LV.getType()); Address Ptr = LV.getBitFieldAddress(); - llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); - + llvm::Value *Val = + Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); + + bool UseVolatile = LV.isVolatileQualified() && + Info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget()); + const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; + const unsigned StorageSize = + UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; if (Info.IsSigned) { - assert(static_cast(Info.Offset + Info.Size) <= Info.StorageSize); - unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size; + assert(static_cast(Offset + Info.Size) <= StorageSize); + unsigned HighBits = StorageSize - Offset - Info.Size; if (HighBits) Val = Builder.CreateShl(Val, HighBits, "bf.shl"); - if (Info.Offset + HighBits) - Val = Builder.CreateAShr(Val, Info.Offset + HighBits, "bf.ashr"); + if (Offset + HighBits) + Val = Builder.CreateAShr(Val, Offset + HighBits, "bf.ashr"); } else { - if (Info.Offset) - Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr"); - if (static_cast(Info.Offset) + Info.Size < Info.StorageSize) - Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize, - Info.Size), - "bf.clear"); + if (Offset) + Val = Builder.CreateLShr(Val, Offset, "bf.lshr"); + if (static_cast(Offset) + Info.Size < StorageSize) + Val = Builder.CreateAnd( + Val, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), "bf.clear"); } Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast"); EmitScalarRangeCheck(Val, LV.getType(), Loc); @@ -2144,39 +2149,43 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, /*isSigned=*/false); llvm::Value *MaskedVal = SrcVal; + const bool UseVolatile = CGM.getCodeGenOpts().AAPCSBitfieldWidth && + Dst.isVolatileQualified() && + Info.VolatileStorageSize != 0 && + isAAPCS(CGM.getTarget()); + const unsigned StorageSize = + UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; + const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; // See if there are other bits in the bitfield's storage we'll need to load // and mask together with source before storing. - if (Info.StorageSize != Info.Size) { - assert(Info.StorageSize > Info.Size && "Invalid bitfield size."); + if (StorageSize != Info.Size) { + assert(StorageSize > Info.Size && "Invalid bitfield size."); llvm::Value *Val = - Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); + Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); // Mask the source value as needed. if (!hasBooleanRepresentation(Dst.getType())) - SrcVal = Builder.CreateAnd(SrcVal, - llvm::APInt::getLowBitsSet(Info.StorageSize, - Info.Size), - "bf.value"); + SrcVal = Builder.CreateAnd( + SrcVal, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), + "bf.value"); MaskedVal = SrcVal; - if (Info.Offset) - SrcVal = Builder.CreateShl(SrcVal, Info.Offset, "bf.shl"); + if (Offset) + SrcVal = Builder.CreateShl(SrcVal, Offset, "bf.shl"); // Mask out the original value. - Val = Builder.CreateAnd(Val, - ~llvm::APInt::getBitsSet(Info.StorageSize, - Info.Offset, - Info.Offset + Info.Size), - "bf.clear"); + Val = Builder.CreateAnd( + Val, ~llvm::APInt::getBitsSet(StorageSize, Offset, Offset + Info.Size), + "bf.clear"); // Or together the unchanged values and the source value. SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set"); } else { - assert(Info.Offset == 0); + assert(Offset == 0); // According to the AACPS: // When a volatile bit-field is written, and its container does not overlap - // with any non-bit-field member, its container must be read exactly once and - // written exactly once using the access width appropriate to the type of the - // container. The two accesses are not atomic. + // with any non-bit-field member, its container must be read exactly once + // and written exactly once using the access width appropriate to the type + // of the container. The two accesses are not atomic. if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) && CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad) Builder.CreateLoad(Ptr, true, "bf.load"); @@ -2191,8 +2200,8 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Sign extend the value if needed. if (Info.IsSigned) { - assert(Info.Size <= Info.StorageSize); - unsigned HighBits = Info.StorageSize - Info.Size; + assert(Info.Size <= StorageSize); + unsigned HighBits = StorageSize - Info.Size; if (HighBits) { ResultVal = Builder.CreateShl(ResultVal, HighBits, "bf.result.shl"); ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr"); @@ -4204,32 +4213,45 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, if (field->isBitField()) { const CGRecordLayout &RL = - CGM.getTypes().getCGRecordLayout(field->getParent()); + CGM.getTypes().getCGRecordLayout(field->getParent()); const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); + const bool UseVolatile = isAAPCS(CGM.getTarget()) && + CGM.getCodeGenOpts().AAPCSBitfieldWidth && + Info.VolatileStorageSize != 0 && + field->getType() + .withCVRQualifiers(base.getVRQualifiers()) + .isVolatileQualified(); Address Addr = base.getAddress(*this); unsigned Idx = RL.getLLVMFieldNo(field); const RecordDecl *rec = field->getParent(); - if (!IsInPreservedAIRegion && - (!getDebugInfo() || !rec->hasAttr())) { - if (Idx != 0) - // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); - } else { - llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( - getContext().getRecordType(rec), rec->getLocation()); - Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx, - getDebugInfoFIndex(rec, field->getFieldIndex()), - DbgInfo); + if (!UseVolatile) { + if (!IsInPreservedAIRegion && + (!getDebugInfo() || !rec->hasAttr())) { + if (Idx != 0) + // For structs, we GEP to the field that the record layout suggests. + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + } else { + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + Addr = Builder.CreatePreserveStructAccessIndex( + Addr, Idx, getDebugInfoFIndex(rec, field->getFieldIndex()), + DbgInfo); + } } - + const unsigned SS = + UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; // Get the access type. - llvm::Type *FieldIntTy = - llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); + llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), SS); if (Addr.getElementType() != FieldIntTy) Addr = Builder.CreateElementBitCast(Addr, FieldIntTy); + if (UseVolatile) { + const unsigned VolatileOffset = Info.VolatileStorageOffset.getQuantity(); + if (VolatileOffset) + Addr = Builder.CreateConstInBoundsGEP(Addr, VolatileOffset); + } QualType fieldType = - field->getType().withCVRQualifiers(base.getVRQualifiers()); + field->getType().withCVRQualifiers(base.getVRQualifiers()); // TODO: Support TBAA for bit fields. LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource()); return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo, diff --git a/clang/lib/CodeGen/CGRecordLayout.h b/clang/lib/CodeGen/CGRecordLayout.h index 730ee4c438e7e0..e6665b72bcba15 100644 --- a/clang/lib/CodeGen/CGRecordLayout.h +++ b/clang/lib/CodeGen/CGRecordLayout.h @@ -46,7 +46,7 @@ namespace CodeGen { /// }; /// /// This will end up as the following LLVM type. The first array is the -/// bitfield, and the second is the padding out to a 4-byte alignmnet. +/// bitfield, and the second is the padding out to a 4-byte alignment. /// /// %t = type { i8, i8, i8, i8, i8, [3 x i8] } /// @@ -80,8 +80,21 @@ struct CGBitFieldInfo { /// The offset of the bitfield storage from the start of the struct. CharUnits StorageOffset; + /// The offset within a contiguous run of bitfields that are represented as a + /// single "field" within the LLVM struct type, taking into account the AAPCS + /// rules for volatile bitfields. This offset is in bits. + unsigned VolatileOffset : 16; + + /// The storage size in bits which should be used when accessing this + /// bitfield. + unsigned VolatileStorageSize; + + /// The offset of the bitfield storage from the start of the struct. + CharUnits VolatileStorageOffset; + CGBitFieldInfo() - : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset() {} + : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset(), + VolatileOffset(), VolatileStorageSize(), VolatileStorageOffset() {} CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned, unsigned StorageSize, CharUnits StorageOffset) diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index 4e5d1d3f16f65b..ce35880106c20f 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -109,6 +109,14 @@ struct CGRecordLowering { D->isMsStruct(Context); } + /// Helper function to check if we are targeting AAPCS. + bool isAAPCS() const { + return Context.getTargetInfo().getABI().startswith("aapcs"); + } + + /// Helper function to check if the target machine is BigEndian. + bool isBE() const { return Context.getTargetInfo().isBigEndian(); } + /// The Itanium base layout rule allows virtual bases to overlap /// other bases, which complicates layout in specific ways. /// @@ -172,7 +180,8 @@ struct CGRecordLowering { void lowerUnion(); void accumulateFields(); void accumulateBitFields(RecordDecl::field_iterator Field, - RecordDecl::field_iterator FieldEnd); + RecordDecl::field_iterator FieldEnd); + void computeVolatileBitfields(); void accumulateBases(); void accumulateVPtrs(); void accumulateVBases(); @@ -237,6 +246,10 @@ void CGRecordLowering::setBitFieldInfo( // least-significant-bit. if (DataLayout.isBigEndian()) Info.Offset = Info.StorageSize - (Info.Offset + Info.Size); + + Info.VolatileStorageSize = 0; + Info.VolatileOffset = 0; + Info.VolatileStorageOffset = CharUnits::Zero(); } void CGRecordLowering::lower(bool NVBaseType) { @@ -261,15 +274,21 @@ void CGRecordLowering::lower(bool NVBaseType) { // 8) Format the complete list of members in a way that can be consumed by // CodeGenTypes::ComputeRecordLayout. CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize(); - if (D->isUnion()) - return lowerUnion(); + if (D->isUnion()) { + lowerUnion(); + computeVolatileBitfields(); + return; + } accumulateFields(); // RD implies C++. if (RD) { accumulateVPtrs(); accumulateBases(); - if (Members.empty()) - return appendPaddingBytes(Size); + if (Members.empty()) { + appendPaddingBytes(Size); + computeVolatileBitfields(); + return; + } if (!NVBaseType) accumulateVBases(); } @@ -281,6 +300,7 @@ void CGRecordLowering::lower(bool NVBaseType) { Members.pop_back(); calculateZeroInit(); fillOutputFields(); + computeVolatileBitfields(); } void CGRecordLowering::lowerUnion() { @@ -418,9 +438,9 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || !DataLayout.fitsInLegalInteger(OffsetInRecord)) return false; - // Make sure StartBitOffset is natually aligned if it is treated as an + // Make sure StartBitOffset is naturally aligned if it is treated as an // IType integer. - if (StartBitOffset % + if (StartBitOffset % Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != 0) return false; @@ -503,6 +523,123 @@ void CGRecordLowering::accumulateBases() { } } +/// The AAPCS that defines that, when possible, bit-fields should +/// be accessed using containers of the declared type width: +/// When a volatile bit-field is read, and its container does not overlap with +/// any non-bit-field member or any zero length bit-field member, its container +/// must be read exactly once using the access width appropriate to the type of +/// the container. When a volatile bit-field is written, and its container does +/// not overlap with any non-bit-field member or any zero-length bit-field +/// member, its container must be read exactly once and written exactly once +/// using the access width appropriate to the type of the container. The two +/// accesses are not atomic. +/// +/// Enforcing the width restriction can be disabled using +/// -fno-aapcs-bitfield-width. +void CGRecordLowering::computeVolatileBitfields() { + if (!isAAPCS() || !Types.getCodeGenOpts().AAPCSBitfieldWidth) + return; + + for (auto &I : BitFields) { + const FieldDecl *Field = I.first; + CGBitFieldInfo &Info = I.second; + llvm::Type *ResLTy = Types.ConvertTypeForMem(Field->getType()); + // If the record alignment is less than the type width, we can't enforce a + // aligned load, bail out. + if ((uint64_t)(Context.toBits(Layout.getAlignment())) < + ResLTy->getPrimitiveSizeInBits()) + continue; + // CGRecordLowering::setBitFieldInfo() pre-adjusts the bit-field offsets + // for big-endian targets, but it assumes a container of width + // Info.StorageSize. Since AAPCS uses a different container size (width + // of the type), we first undo that calculation here and redo it once + // the bit-field offset within the new container is calculated. + const unsigned OldOffset = + isBE() ? Info.StorageSize - (Info.Offset + Info.Size) : Info.Offset; + // Offset to the bit-field from the beginning of the struct. + const unsigned AbsoluteOffset = + Context.toBits(Info.StorageOffset) + OldOffset; + + // Container size is the width of the bit-field type. + const unsigned StorageSize = ResLTy->getPrimitiveSizeInBits(); + // Nothing to do if the access uses the desired + // container width and is naturally aligned. + if (Info.StorageSize == StorageSize && (OldOffset % StorageSize == 0)) + continue; + + // Offset within the container. + unsigned Offset = AbsoluteOffset & (StorageSize - 1); + // Bail out if an aligned load of the container cannot cover the entire + // bit-field. This can happen for example, if the bit-field is part of a + // packed struct. AAPCS does not define access rules for such cases, we let + // clang to follow its own rules. + if (Offset + Info.Size > StorageSize) + continue; + + // Re-adjust offsets for big-endian targets. + if (isBE()) + Offset = StorageSize - (Offset + Info.Size); + + const CharUnits StorageOffset = + Context.toCharUnitsFromBits(AbsoluteOffset & ~(StorageSize - 1)); + const CharUnits End = StorageOffset + + Context.toCharUnitsFromBits(StorageSize) - + CharUnits::One(); + + const ASTRecordLayout &Layout = + Context.getASTRecordLayout(Field->getParent()); + // If we access outside memory outside the record, than bail out. + const CharUnits RecordSize = Layout.getSize(); + if (End >= RecordSize) + continue; + + // Bail out if performing this load would access non-bit-fields members. + bool Conflict = false; + for (const auto *F : D->fields()) { + // Allow sized bit-fields overlaps. + if (F->isBitField() && !F->isZeroLengthBitField(Context)) + continue; + + const CharUnits FOffset = Context.toCharUnitsFromBits( + Layout.getFieldOffset(F->getFieldIndex())); + + // As C11 defines, a zero sized bit-field defines a barrier, so + // fields after and before it should be race condition free. + // The AAPCS acknowledges it and imposes no restritions when the + // natural container overlaps a zero-length bit-field. + if (F->isZeroLengthBitField(Context)) { + if (End > FOffset && StorageOffset < FOffset) { + Conflict = true; + break; + } + } + + const CharUnits FEnd = + FOffset + + Context.toCharUnitsFromBits( + Types.ConvertTypeForMem(F->getType())->getPrimitiveSizeInBits()) - + CharUnits::One(); + // If no overlap, continue. + if (End < FOffset || FEnd < StorageOffset) + continue; + + // The desired load overlaps a non-bit-field member, bail out. + Conflict = true; + break; + } + + if (Conflict) + continue; + // Write the new bit-field access parameters. + // As the storage offset now is defined as the number of elements from the + // start of the structure, we should divide the Offset by the element size. + Info.VolatileStorageOffset = + StorageOffset / Context.toCharUnitsFromBits(StorageSize).getQuantity(); + Info.VolatileStorageSize = StorageSize; + Info.VolatileOffset = Offset; + } +} + void CGRecordLowering::accumulateVPtrs() { if (Layout.hasOwnVFPtr()) Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr, @@ -848,8 +985,10 @@ CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { assert(Info.StorageSize <= SL->getSizeInBits() && "Union not large enough for bitfield storage"); } else { - assert(Info.StorageSize == - getDataLayout().getTypeAllocSizeInBits(ElementTy) && + assert((Info.StorageSize == + getDataLayout().getTypeAllocSizeInBits(ElementTy) || + Info.VolatileStorageSize == + getDataLayout().getTypeAllocSizeInBits(ElementTy)) && "Storage size does not match the element type size"); } assert(Info.Size > 0 && "Empty bitfield!"); @@ -897,11 +1036,12 @@ LLVM_DUMP_METHOD void CGRecordLayout::dump() const { void CGBitFieldInfo::print(raw_ostream &OS) const { OS << ""; + << " StorageOffset:" << StorageOffset.getQuantity() + << " VolatileOffset:" << VolatileOffset + << " VolatileStorageSize:" << VolatileStorageSize + << " VolatileStorageOffset:" << VolatileStorageOffset.getQuantity() << ">"; } LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index fbccff11562c17..1fbeb458a9d23f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1453,6 +1453,9 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); + Opts.AAPCSBitfieldWidth = Args.hasFlag(OPT_AAPCSBitfieldWidth, + OPT_ForceNoAAPCSBitfieldWidth, + true); return Success; } diff --git a/clang/test/CodeGen/aapcs-bitfield.c b/clang/test/CodeGen/aapcs-bitfield.c index 4fc889bcf379ec..13db68d6ae81bd 100644 --- a/clang/test/CodeGen/aapcs-bitfield.c +++ b/clang/test/CodeGen/aapcs-bitfield.c @@ -1,8 +1,12 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LE -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BE -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=LE,LENUMLOADS -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=BE,BENUMLOADS +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=LE +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=BE +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=LENUMLOADS +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=BENUMLOADS +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LEWIDTH +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BEWIDTH +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=LEWIDTHNUM +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=BEWIDTHNUM struct st0 { short c : 7; @@ -25,6 +29,57 @@ struct st0 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st0_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st0_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st0_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st0_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st0_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st0_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st0_check_load(struct st0 *m) { return m->c; } @@ -47,6 +102,60 @@ int st0_check_load(struct st0 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st0_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st0_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st0_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st0_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st0_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st0_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st0_check_store(struct st0 *m) { m->c = 1; } @@ -73,6 +182,57 @@ struct st1 { // BE-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st1_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st1_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st1_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st1_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st1_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st1_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st1_check_load(struct st1 *m) { return m->c; } @@ -95,6 +255,60 @@ int st1_check_load(struct st1 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st1_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 +// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st1_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st1_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 +// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st1_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st1_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 +// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st1_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void st1_check_store(struct st1 *m) { m->c = 1; } @@ -121,6 +335,57 @@ struct st2 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st2_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st2_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st2_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st2_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st2_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st2_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st2_check_load(struct st2 *m) { return m->c; } @@ -143,6 +408,60 @@ int st2_check_load(struct st2 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st2_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st2_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st2_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st2_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st2_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st2_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st2_check_store(struct st2 *m) { m->c = 1; } @@ -168,6 +487,57 @@ struct st3 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st3_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st3_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st3_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st3_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st3_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st3_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st3_check_load(struct st3 *m) { return m->c; } @@ -190,6 +560,60 @@ int st3_check_load(struct st3 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st3_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st3_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st3_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st3_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 +// BEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st3_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st3_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 +// BEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st3_check_store(struct st3 *m) { m->c = 1; } @@ -221,6 +645,68 @@ struct st4 { // BE-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st4_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 +// LENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st4_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 +// BENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st4_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st4_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st4_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st4_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st4_check_load(struct st4 *m) { return m->c; } @@ -243,6 +729,64 @@ int st4_check_load(struct st4 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st4_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st4_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4 +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st4_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st4_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 +// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st4_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 +// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st4_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 +// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void st4_check_store(struct st4 *m) { m->c = 1; } @@ -265,6 +809,60 @@ void st4_check_store(struct st4 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st4_check_nonv_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st4_check_nonv_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st4_check_nonv_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st4_check_nonv_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st4_check_nonv_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st4_check_nonv_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void st4_check_nonv_store(struct st4 *m) { m->b = 1; } @@ -291,6 +889,57 @@ struct st5 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // +// LENUMLOADS-LABEL: @st5_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[CONV]] +// +// BENUMLOADS-LABEL: @st5_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[CONV]] +// +// LEWIDTH-LABEL: @st5_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: ret i32 [[CONV]] +// +// BEWIDTH-LABEL: @st5_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: ret i32 [[CONV]] +// +// LEWIDTHNUM-LABEL: @st5_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[CONV]] +// +// BEWIDTHNUM-LABEL: @st5_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[CONV]] +// int st5_check_load(struct st5 *m) { return m->c; } @@ -313,6 +962,60 @@ int st5_check_load(struct st5 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st5_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st5_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 +// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st5_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st5_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 +// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st5_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st5_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 +// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 +// BEWIDTHNUM-NEXT: ret void +// void st5_check_store(struct st5 *m) { m->c = 1; } @@ -331,7 +1034,7 @@ struct st6 { // LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 // LE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -349,7 +1052,7 @@ struct st6 { // BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -359,6 +1062,114 @@ struct st6 { // BE-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] // BE-NEXT: ret i32 [[ADD4]] // +// LENUMLOADS-LABEL: @st6_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 +// LENUMLOADS-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 +// LENUMLOADS-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 +// LENUMLOADS-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] +// LENUMLOADS-NEXT: ret i32 [[ADD5]] +// +// BENUMLOADS-LABEL: @st6_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// BENUMLOADS-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 +// BENUMLOADS-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 +// BENUMLOADS-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] +// BENUMLOADS-NEXT: ret i32 [[ADD4]] +// +// LEWIDTH-LABEL: @st6_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 +// LEWIDTH-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 +// LEWIDTH-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 +// LEWIDTH-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] +// LEWIDTH-NEXT: ret i32 [[ADD5]] +// +// BEWIDTH-LABEL: @st6_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// BEWIDTH-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 +// BEWIDTH-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 +// BEWIDTH-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] +// BEWIDTH-NEXT: ret i32 [[ADD4]] +// +// LEWIDTHNUM-LABEL: @st6_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 +// LEWIDTHNUM-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 +// LEWIDTHNUM-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 +// LEWIDTHNUM-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] +// LEWIDTHNUM-NEXT: ret i32 [[ADD5]] +// +// BEWIDTHNUM-LABEL: @st6_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 +// BEWIDTHNUM-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 +// BEWIDTHNUM-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] +// BEWIDTHNUM-NEXT: ret i32 [[ADD4]] +// int st6_check_load(volatile struct st6 *m) { int x = m->a; x += m->b; @@ -374,7 +1185,7 @@ int st6_check_load(volatile struct st6 *m) { // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 // LE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: store i8 2, i8* [[B]], align 2 +// LE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // LE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // LE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 @@ -390,7 +1201,7 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: store i8 2, i8* [[B]], align 2 +// BE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // BE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // BE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 @@ -398,6 +1209,102 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st6_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// LENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 +// LENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 +// LENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st6_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 +// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 +// BENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 +// BENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st6_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 +// LEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 +// LEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st6_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 +// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 +// BEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 +// BEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st6_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 +// LEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 +// LEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st6_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 +// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 +// BEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 +// BEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void st6_check_store(struct st6 *m) { m->a = 1; m->b = 2; @@ -418,10 +1325,10 @@ struct st7b { // LE-LABEL: @st7_check_load( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 +// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 // LE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -435,10 +1342,10 @@ struct st7b { // BE-LABEL: @st7_check_load( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 +// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 // BE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -448,6 +1355,105 @@ struct st7b { // BE-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] // BE-NEXT: ret i32 [[ADD3]] // +// LENUMLOADS-LABEL: @st7_check_load( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// LENUMLOADS-NEXT: ret i32 [[ADD3]] +// +// BENUMLOADS-LABEL: @st7_check_load( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// BENUMLOADS-NEXT: ret i32 [[ADD3]] +// +// LEWIDTH-LABEL: @st7_check_load( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// LEWIDTH-NEXT: ret i32 [[ADD3]] +// +// BEWIDTH-LABEL: @st7_check_load( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// BEWIDTH-NEXT: ret i32 [[ADD3]] +// +// LEWIDTHNUM-LABEL: @st7_check_load( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// LEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// LEWIDTHNUM-NEXT: ret i32 [[ADD3]] +// +// BEWIDTHNUM-LABEL: @st7_check_load( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 +// BEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] +// BEWIDTHNUM-NEXT: ret i32 [[ADD3]] +// int st7_check_load(struct st7b *m) { int r = m->x; r += m->y.a; @@ -458,9 +1464,9 @@ int st7_check_load(struct st7b *m) { // LE-LABEL: @st7_check_store( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: store i8 1, i8* [[X]], align 4 +// LE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: store volatile i8 2, i8* [[A]], align 4 +// LE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 @@ -471,9 +1477,9 @@ int st7_check_load(struct st7b *m) { // BE-LABEL: @st7_check_store( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: store i8 1, i8* [[X]], align 4 +// BE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: store volatile i8 2, i8* [[A]], align 4 +// BE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 @@ -481,6 +1487,84 @@ int st7_check_load(struct st7b *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @st7_check_store( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 +// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @st7_check_store( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 +// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @st7_check_store( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 +// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @st7_check_store( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 +// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @st7_check_store( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// LEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 +// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @st7_check_store( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 +// BEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 +// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void st7_check_store(struct st7b *m) { m->x = 1; m->y.a = 2; @@ -504,6 +1588,42 @@ struct st8 { // BE-NEXT: store i16 -1, i16* [[TMP0]], align 4 // BE-NEXT: ret i32 65535 // +// LENUMLOADS-LABEL: @st8_check_assignment( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret i32 65535 +// +// BENUMLOADS-LABEL: @st8_check_assignment( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret i32 65535 +// +// LEWIDTH-LABEL: @st8_check_assignment( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret i32 65535 +// +// BEWIDTH-LABEL: @st8_check_assignment( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret i32 65535 +// +// LEWIDTHNUM-LABEL: @st8_check_assignment( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret i32 65535 +// +// BEWIDTHNUM-LABEL: @st8_check_assignment( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret i32 65535 +// int st8_check_assignment(struct st8 *m) { return m->f = 0xffff; } @@ -526,6 +1646,50 @@ struct st9{ // BE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // +// LENUMLOADS-LABEL: @read_st9( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// BENUMLOADS-LABEL: @read_st9( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTH-LABEL: @read_st9( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 +// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTH-LABEL: @read_st9( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 +// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTHNUM-LABEL: @read_st9( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 +// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTHNUM-LABEL: @read_st9( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 +// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// int read_st9(volatile struct st9 *m) { return m->f; } @@ -533,17 +1697,65 @@ int read_st9(volatile struct st9 *m) { // LE-LABEL: @store_st9( // LE-NEXT: entry: // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // LE-NEXT: ret void // // BE-LABEL: @store_st9( // BE-NEXT: entry: // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st9( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st9( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st9( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st9( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st9( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st9( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void store_st9(volatile struct st9 *m) { m->f = 1; } @@ -553,7 +1765,6 @@ void store_st9(volatile struct st9 *m) { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // LE-NEXT: ret void // @@ -562,10 +1773,75 @@ void store_st9(volatile struct st9 *m) { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st9( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st9( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st9( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st9( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st9( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st9( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_st9(volatile struct st9 *m) { ++m->f; } @@ -593,6 +1869,56 @@ struct st10{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // +// LENUMLOADS-LABEL: @read_st10( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// BENUMLOADS-LABEL: @read_st10( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 +// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTH-LABEL: @read_st10( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTH-LABEL: @read_st10( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTHNUM-LABEL: @read_st10( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTHNUM-LABEL: @read_st10( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 +// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// int read_st10(volatile struct st10 *m) { return m->f; } @@ -615,6 +1941,60 @@ int read_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st10( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2 +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st10( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st10( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st10( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st10( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st10( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void store_st10(volatile struct st10 *m) { m->f = 1; } @@ -643,6 +2023,78 @@ void store_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st10( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st10( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st10( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st10( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st10( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st10( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_st10(volatile struct st10 *m) { ++m->f; } @@ -666,6 +2118,48 @@ struct st11{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // +// LENUMLOADS-LABEL: @read_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// BENUMLOADS-LABEL: @read_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTH-LABEL: @read_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: ret i32 [[BF_CAST]] +// +// BEWIDTH-LABEL: @read_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: ret i32 [[BF_CAST]] +// +// LEWIDTHNUM-LABEL: @read_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] +// +// BEWIDTHNUM-LABEL: @read_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] +// int read_st11(volatile struct st11 *m) { return m->f; } @@ -673,17 +2167,55 @@ int read_st11(volatile struct st11 *m) { // LE-LABEL: @store_st11( // LE-NEXT: entry: // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 1, i16* [[F]], align 1 // LE-NEXT: ret void // // BE-LABEL: @store_st11( // BE-NEXT: entry: // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 1, i16* [[F]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void store_st11(volatile struct st11 *m) { m->f = 1; } @@ -693,7 +2225,6 @@ void store_st11(volatile struct st11 *m) { // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // LE-NEXT: ret void // @@ -702,10 +2233,61 @@ void store_st11(volatile struct st11 *m) { // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// LENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// BENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_st11(volatile struct st11 *m) { ++m->f; } @@ -713,19 +2295,67 @@ void increment_st11(volatile struct st11 *m) { // LE-LABEL: @increment_e_st11( // LE-NEXT: entry: // LE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 +// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 // LE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 +// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 // LE-NEXT: ret void // // BE-LABEL: @increment_e_st11( // BE-NEXT: entry: // BE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 +// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 // BE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 +// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_e_st11( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_e_st11( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_e_st11( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_e_st11( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_e_st11( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_e_st11( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BEWIDTHNUM-NEXT: ret void +// void increment_e_st11(volatile struct st11 *m) { ++m->e; } @@ -751,6 +2381,54 @@ struct st12{ // BE-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 // BE-NEXT: ret i32 [[BF_ASHR]] // +// LENUMLOADS-LABEL: @read_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// LENUMLOADS-NEXT: ret i32 [[BF_ASHR]] +// +// BENUMLOADS-LABEL: @read_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// BENUMLOADS-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTH-LABEL: @read_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTH-LABEL: @read_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] +// +// LEWIDTHNUM-LABEL: @read_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// +// BEWIDTHNUM-LABEL: @read_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 +// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 +// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] +// int read_st12(volatile struct st12 *m) { return m->f; } @@ -773,6 +2451,60 @@ int read_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @store_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @store_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @store_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @store_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @store_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @store_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void store_st12(volatile struct st12 *m) { m->f = 1; } @@ -801,6 +2533,78 @@ void store_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_st12(volatile struct st12 *m) { ++m->f; } @@ -829,6 +2633,78 @@ void increment_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_e_st12( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_e_st12( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_e_st12( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_e_st12( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_e_st12( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_e_st12( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_e_st12(volatile struct st12 *m) { ++m->e; } @@ -866,6 +2742,90 @@ struct st13 { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_b_st13( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_st13( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_st13( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_st13( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_st13( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_st13( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_b_st13(volatile struct st13 *s) { s->b++; } @@ -879,7 +2839,6 @@ struct st14 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -888,10 +2847,61 @@ struct st14 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_a_st14( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_st14( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_st14( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_st14( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_st14( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_st14( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_a_st14(volatile struct st14 *s) { s->a++; } @@ -905,7 +2915,6 @@ struct st15 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -914,10 +2923,61 @@ struct st15 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_a_st15( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_st15( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_st15( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_st15( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_st15( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_st15( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_a_st15(volatile struct st15 *s) { s->a++; } @@ -955,6 +3015,84 @@ struct st16 { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_a_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_a_st16(struct st16 *s) { s->a++; } @@ -987,6 +3125,90 @@ void increment_a_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_b_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_b_st16(struct st16 *s) { s->b++; } @@ -1019,6 +3241,90 @@ void increment_b_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_c_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_c_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_c_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_c_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_c_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_c_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_c_st16(struct st16 *s) { s->c++; } @@ -1053,6 +3359,96 @@ void increment_c_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_d_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_d_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_d_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_d_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_d_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_d_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_d_st16(struct st16 *s) { s->d++; } @@ -1085,6 +3481,68 @@ void increment_d_st16(struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_a_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_a_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_a_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_a_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_a_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_a_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_a_st16(volatile struct st16 *s) { s->a++; } @@ -1119,6 +3577,88 @@ void increment_v_a_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_b_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_b_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_b_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_b_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_b_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_b_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_b_st16(volatile struct st16 *s) { s->b++; } @@ -1153,6 +3693,74 @@ void increment_v_b_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_c_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_c_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_c_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_c_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_c_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_c_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_c_st16(volatile struct st16 *s) { s->c++; } @@ -1189,6 +3797,90 @@ void increment_v_c_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_d_st16( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 +// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_d_st16( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 +// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 +// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] +// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_d_st16( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_d_st16( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_d_st16( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_d_st16( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_d_st16(volatile struct st16 *s) { s->d++; } @@ -1227,6 +3919,90 @@ char c : 8; // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_b_st17( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_b_st17( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_b_st17( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_b_st17( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_b_st17( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_b_st17( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 +// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_b_st17(volatile struct st17 *s) { s->b++; } @@ -1259,6 +4035,458 @@ void increment_v_b_st17(volatile struct st17 *s) { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // +// LENUMLOADS-LABEL: @increment_v_c_st17( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 32 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i8 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 +// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[INC]] to i40 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 32 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 4294967295 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_v_c_st17( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i8 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i8 [[INC]] to i40 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -256 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] +// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_v_c_st17( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_v_c_st17( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_v_c_st17( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_v_c_st17( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 +// BEWIDTHNUM-NEXT: ret void +// void increment_v_c_st17(volatile struct st17 *s) { s->c++; } + +// A zero bitfield should block, as the C11 specification +// requires a and b to be different memory positions +struct zero_bitfield { + int a : 8; + char : 0; + int b : 8; +}; + +// LE-LABEL: @increment_a_zero_bitfield( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_a_zero_bitfield( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_a_zero_bitfield( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_zero_bitfield( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_zero_bitfield( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_zero_bitfield( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// +void increment_a_zero_bitfield(volatile struct zero_bitfield *s) { + s->a++; +} + +// LE-LABEL: @increment_b_zero_bitfield( +// LE-NEXT: entry: +// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_b_zero_bitfield( +// BE-NEXT: entry: +// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_b_zero_bitfield( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_zero_bitfield( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_zero_bitfield( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_zero_bitfield( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 +// BEWIDTHNUM-NEXT: ret void +// +void increment_b_zero_bitfield(volatile struct zero_bitfield *s) { + s->b++; +} + +// The zero bitfield here does not affect +struct zero_bitfield_ok { + short a : 8; + char a1 : 8; + long : 0; + int b : 24; +}; + +// LE-LABEL: @increment_a_zero_bitfield_ok( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LE-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 +// LE-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 +// LE-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] +// LE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// LE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LE-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 +// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 +// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] +// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_a_zero_bitfield_ok( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BE-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BE-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 +// BE-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] +// BE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// BE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 +// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] +// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 +// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 +// LENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] +// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// LENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] +// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 +// BENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] +// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 +// BENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] +// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_a_zero_bitfield_ok( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// LEWIDTH-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 +// LEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// LEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_a_zero_bitfield_ok( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BEWIDTH-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// BEWIDTH-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 +// BEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// BEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 +// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 +// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// LEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP2]], align 1 +// LEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 +// BEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 +// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* +// BEWIDTHNUM-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 +// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] +// BEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP3]], align 1 +// BEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 +// BEWIDTHNUM-NEXT: ret void +// +void increment_a_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { + s->a1 += s->a; +} + +// LE-LABEL: @increment_b_zero_bitfield_ok( +// LE-NEXT: entry: +// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LE-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LE-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LE-NEXT: ret void +// +// BE-LABEL: @increment_b_zero_bitfield_ok( +// BE-NEXT: entry: +// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BE-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BE-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BE-NEXT: ret void +// +// LENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( +// LENUMLOADS-NEXT: entry: +// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LENUMLOADS-NEXT: ret void +// +// BENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( +// BENUMLOADS-NEXT: entry: +// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BENUMLOADS-NEXT: ret void +// +// LEWIDTH-LABEL: @increment_b_zero_bitfield_ok( +// LEWIDTH-NEXT: entry: +// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTH-NEXT: ret void +// +// BEWIDTH-LABEL: @increment_b_zero_bitfield_ok( +// BEWIDTH-NEXT: entry: +// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTH-NEXT: ret void +// +// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( +// LEWIDTHNUM-NEXT: entry: +// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 +// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 +// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 +// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] +// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// LEWIDTHNUM-NEXT: ret void +// +// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( +// BEWIDTHNUM-NEXT: entry: +// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 +// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* +// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 +// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 +// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 +// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] +// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 +// BEWIDTHNUM-NEXT: ret void +// +void increment_b_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { + s->b++; +} diff --git a/clang/test/CodeGen/bitfield-2.c b/clang/test/CodeGen/bitfield-2.c index 9d669575ecd117..661d42683bc276 100644 --- a/clang/test/CodeGen/bitfield-2.c +++ b/clang/test/CodeGen/bitfield-2.c @@ -14,7 +14,7 @@ // CHECK-RECORD: LLVMType:%struct.s0 = type { [3 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: +// CHECK-RECORD: -// CHECK-RECORD: +// CHECK-RECORD: +// CHECK-RECORD: -// CHECK-RECORD: +// CHECK-RECORD: Date: Tue, 8 Sep 2020 11:26:10 -0500 Subject: [PATCH 094/161] [GVN] Add testcase that uses masked loads and stores, NFC --- llvm/test/Transforms/GVN/masked-load-store.ll | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 llvm/test/Transforms/GVN/masked-load-store.ll diff --git a/llvm/test/Transforms/GVN/masked-load-store.ll b/llvm/test/Transforms/GVN/masked-load-store.ll new file mode 100644 index 00000000000000..8119d77bb76e05 --- /dev/null +++ b/llvm/test/Transforms/GVN/masked-load-store.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -gvn -S < %s | FileCheck %s + +define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] +; CHECK-NEXT: ret <128 x i8> [[V3]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v3 = add <128 x i8> %v1, %v2 + ret <128 x i8> %v3 +} + +define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, <128 x i8>* [[A0:%.*]], i32 1 +; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A2]], <128 x i8>* [[V1]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: [[V3:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V3]] +; CHECK-NEXT: ret <128 x i8> [[V4]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = getelementptr <128 x i8>, <128 x i8>* %a0, i32 1 + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %a2, <128 x i8>* %v1, i32 4, <128 x i1> %v0) + %v3 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v4 = add <128 x i8> %v2, %v3 + ret <128 x i8> %v4 +} + +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>) +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32, <128 x i1>) + From 97e77ac0ed80877cda58b1dddf98890cc7b0d167 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 8 Sep 2020 16:53:24 +0000 Subject: [PATCH 095/161] Add more explicit error message when creating a type or attribute for an unregistered dialect (NFC) Differential Revision: https://reviews.llvm.org/D87177 --- mlir/include/mlir/IR/AttributeSupport.h | 17 +++++++++++++++++ mlir/include/mlir/IR/TypeSupport.h | 15 +++++++++++++++ mlir/include/mlir/Support/StorageUniquer.h | 10 ++++++++++ mlir/lib/Support/StorageUniquer.cpp | 16 ++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/mlir/include/mlir/IR/AttributeSupport.h b/mlir/include/mlir/IR/AttributeSupport.h index 35084a20493f58..c0e3a0bb9b26e0 100644 --- a/mlir/include/mlir/IR/AttributeSupport.h +++ b/mlir/include/mlir/IR/AttributeSupport.h @@ -16,6 +16,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/StorageUniquerSupport.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/Twine.h" namespace mlir { class MLIRContext; @@ -142,6 +143,14 @@ class AttributeUniquer { static typename std::enable_if_t< !std::is_same::value, T> get(MLIRContext *ctx, Args &&...args) { +#ifndef NDEBUG + if (!ctx->getAttributeUniquer().isParametricStorageInitialized( + T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create Attribute '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getAttributeUniquer().get( [ctx](AttributeStorage *storage) { initializeAttributeStorage(storage, ctx, T::getTypeID()); @@ -153,6 +162,14 @@ class AttributeUniquer { static typename std::enable_if_t< std::is_same::value, T> get(MLIRContext *ctx) { +#ifndef NDEBUG + if (!ctx->getAttributeUniquer().isSingletonStorageInitialized( + T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create Attribute '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getAttributeUniquer().get(T::getTypeID()); } diff --git a/mlir/include/mlir/IR/TypeSupport.h b/mlir/include/mlir/IR/TypeSupport.h index ace5eaa733454d..c1de5895791541 100644 --- a/mlir/include/mlir/IR/TypeSupport.h +++ b/mlir/include/mlir/IR/TypeSupport.h @@ -15,6 +15,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/StorageUniquerSupport.h" +#include "llvm/ADT/Twine.h" namespace mlir { class Dialect; @@ -126,6 +127,13 @@ struct TypeUniquer { static typename std::enable_if_t< !std::is_same::value, T> get(MLIRContext *ctx, Args &&...args) { +#ifndef NDEBUG + if (!ctx->getTypeUniquer().isParametricStorageInitialized(T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create type '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getTypeUniquer().get( [&](TypeStorage *storage) { storage->initialize(AbstractType::lookup(T::getTypeID(), ctx)); @@ -137,6 +145,13 @@ struct TypeUniquer { static typename std::enable_if_t< std::is_same::value, T> get(MLIRContext *ctx) { +#ifndef NDEBUG + if (!ctx->getTypeUniquer().isSingletonStorageInitialized(T::getTypeID())) + llvm::report_fatal_error(llvm::Twine("can't create type '") + + llvm::getTypeName() + + "' because storage uniquer isn't initialized: " + "the dialect was likely not loaded."); +#endif return ctx->getTypeUniquer().get(T::getTypeID()); } diff --git a/mlir/include/mlir/Support/StorageUniquer.h b/mlir/include/mlir/Support/StorageUniquer.h index eb04688be19026..d0a6170805bfdf 100644 --- a/mlir/include/mlir/Support/StorageUniquer.h +++ b/mlir/include/mlir/Support/StorageUniquer.h @@ -210,6 +210,16 @@ class StorageUniquer { return get(TypeID::get()); } + /// Test if there is a singleton storage uniquer initialized for the provided + /// TypeID. This is only useful for debugging/diagnostic purpose: the uniquer + /// is initialized when a dialect is loaded. + bool isSingletonStorageInitialized(TypeID id); + + /// Test if there is a parametric storage uniquer initialized for the provided + /// TypeID. This is only useful for debugging/diagnostic purpose: the uniquer + /// is initialized when a dialect is loaded. + bool isParametricStorageInitialized(TypeID id); + /// Changes the mutable component of 'storage' by forwarding the trailing /// arguments to the 'mutate' function of the derived class. template diff --git a/mlir/lib/Support/StorageUniquer.cpp b/mlir/lib/Support/StorageUniquer.cpp index 73578b5c91acf2..a3e296e99e7389 100644 --- a/mlir/lib/Support/StorageUniquer.cpp +++ b/mlir/lib/Support/StorageUniquer.cpp @@ -89,6 +89,9 @@ struct StorageUniquerImpl { // Parametric Storage //===--------------------------------------------------------------------===// + /// Check if an instance of a parametric storage class exists. + bool hasParametricStorage(TypeID id) { return parametricUniquers.count(id); } + /// Get or create an instance of a parametric type. BaseStorage * getOrCreate(TypeID id, unsigned hashValue, @@ -176,6 +179,9 @@ struct StorageUniquerImpl { return singletonInstance; } + /// Check if an instance of a singleton storage class exists. + bool hasSingleton(TypeID id) { return singletonInstances.count(id); } + //===--------------------------------------------------------------------===// // Instance Storage //===--------------------------------------------------------------------===// @@ -227,6 +233,16 @@ auto StorageUniquer::getSingletonImpl(TypeID id) -> BaseStorage * { return impl->getSingleton(id); } +/// Test is the storage singleton is initialized. +bool StorageUniquer::isSingletonStorageInitialized(TypeID id) { + return impl->hasSingleton(id); +} + +/// Test is the parametric storage is initialized. +bool StorageUniquer::isParametricStorageInitialized(TypeID id) { + return impl->hasParametricStorage(id); +} + /// Implementation for registering an instance of a derived type with default /// storage. void StorageUniquer::registerSingletonImpl( From 2d7fd38cf7db18edbbfa0e6dfb7454a255171867 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 4 Sep 2020 19:19:20 -0700 Subject: [PATCH 096/161] [sanitizers] Remove unneeded MaybeCall*DefaultOptions() and nullptr checks D28596 added SANITIZER_INTERFACE_WEAK_DEF which can guarantee `*_default_options` are always defined. The weak attributes on the `__{asan,lsan,msan,ubsan}_default_options` declarations can thus be removed. `MaybeCall*DefaultOptions` no longer need nullptr checks, so their call sites can just be replaced by `__*_default_options`. Reviewed By: #sanitizers, vitalybuka Differential Revision: https://reviews.llvm.org/D87175 --- compiler-rt/lib/asan/asan_flags.cpp | 10 +++------- compiler-rt/lib/asan/asan_interface_internal.h | 4 ++-- compiler-rt/lib/cfi/cfi.cpp | 2 +- compiler-rt/lib/hwasan/hwasan.cpp | 2 +- compiler-rt/lib/lsan/lsan.cpp | 2 +- compiler-rt/lib/lsan/lsan_common.cpp | 9 ++------- compiler-rt/lib/msan/msan.cpp | 14 +++++--------- compiler-rt/lib/msan/msan_interface_internal.h | 4 ++-- compiler-rt/lib/tsan/rtl/tsan_flags.cpp | 2 +- compiler-rt/lib/ubsan/ubsan_flags.cpp | 6 +----- compiler-rt/lib/ubsan/ubsan_flags.h | 2 -- 11 files changed, 19 insertions(+), 38 deletions(-) diff --git a/compiler-rt/lib/asan/asan_flags.cpp b/compiler-rt/lib/asan/asan_flags.cpp index c5c70eaed737fe..cb6a89fe32ce75 100644 --- a/compiler-rt/lib/asan/asan_flags.cpp +++ b/compiler-rt/lib/asan/asan_flags.cpp @@ -26,10 +26,6 @@ namespace __asan { Flags asan_flags_dont_use_directly; // use via flags(). -static const char *MaybeCallAsanDefaultOptions() { - return (&__asan_default_options) ? __asan_default_options() : ""; -} - static const char *MaybeUseAsanDefaultOptionsCompileDefinition() { #ifdef ASAN_DEFAULT_OPTIONS return SANITIZER_STRINGIFY(ASAN_DEFAULT_OPTIONS); @@ -108,14 +104,14 @@ void InitializeFlags() { asan_parser.ParseString(asan_compile_def); // Override from user-specified string. - const char *asan_default_options = MaybeCallAsanDefaultOptions(); + const char *asan_default_options = __asan_default_options(); asan_parser.ParseString(asan_default_options); #if CAN_SANITIZE_UB - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif #if CAN_SANITIZE_LEAKS - const char *lsan_default_options = __lsan::MaybeCallLsanDefaultOptions(); + const char *lsan_default_options = __lsan_default_options(); lsan_parser.ParseString(lsan_default_options); #endif diff --git a/compiler-rt/lib/asan/asan_interface_internal.h b/compiler-rt/lib/asan/asan_interface_internal.h index f14cbbcb76a358..3e6e6602887465 100644 --- a/compiler-rt/lib/asan/asan_interface_internal.h +++ b/compiler-rt/lib/asan/asan_interface_internal.h @@ -173,8 +173,8 @@ extern "C" { SANITIZER_INTERFACE_ATTRIBUTE void __asan_print_accumulated_stats(); - SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE - const char* __asan_default_options(); + SANITIZER_INTERFACE_ATTRIBUTE + const char *__asan_default_options(); SANITIZER_INTERFACE_ATTRIBUTE extern uptr __asan_shadow_memory_dynamic_address; diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp index fd48f71643b6fe..b75c72b215c275 100644 --- a/compiler-rt/lib/cfi/cfi.cpp +++ b/compiler-rt/lib/cfi/cfi.cpp @@ -379,7 +379,7 @@ void InitializeFlags() { __ubsan::RegisterUbsanFlags(&ubsan_parser, uf); RegisterCommonFlags(&ubsan_parser); - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); ubsan_parser.ParseStringFromEnv("UBSAN_OPTIONS"); #endif diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index 11b4d3891bc2cf..c5322110cb662a 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -112,7 +112,7 @@ static void InitializeFlags() { if (__hwasan_default_options) parser.ParseString(__hwasan_default_options()); #if HWASAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif diff --git a/compiler-rt/lib/lsan/lsan.cpp b/compiler-rt/lib/lsan/lsan.cpp index 80a6e2fa70169d..c8cc045783d451 100644 --- a/compiler-rt/lib/lsan/lsan.cpp +++ b/compiler-rt/lib/lsan/lsan.cpp @@ -73,7 +73,7 @@ static void InitializeFlags() { RegisterCommonFlags(&parser); // Override from user-specified string. - const char *lsan_default_options = MaybeCallLsanDefaultOptions(); + const char *lsan_default_options = __lsan_default_options(); parser.ParseString(lsan_default_options); parser.ParseStringFromEnv("LSAN_OPTIONS"); diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 67f85f2f31de46..93ce0ddc3d68e5 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -110,10 +110,6 @@ void InitializeRootRegions() { root_regions = new (placeholder) InternalMmapVector(); } -const char *MaybeCallLsanDefaultOptions() { - return (&__lsan_default_options) ? __lsan_default_options() : ""; -} - void InitCommonLsan() { InitializeRootRegions(); if (common_flags()->detect_leaks) { @@ -900,12 +896,11 @@ int __lsan_do_recoverable_leak_check() { return 0; } -#if !SANITIZER_SUPPORTS_WEAK_HOOKS -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char * __lsan_default_options() { +SANITIZER_INTERFACE_WEAK_DEF(const char *, __lsan_default_options, void) { return ""; } +#if !SANITIZER_SUPPORTS_WEAK_HOOKS SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int __lsan_is_turned_off() { return 0; diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 3028f79f041c33..d651a376789bd7 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -172,10 +172,9 @@ static void InitializeFlags() { #endif // Override from user-specified string. - if (__msan_default_options) - parser.ParseString(__msan_default_options()); + parser.ParseString(__msan_default_options()); #if MSAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif @@ -726,12 +725,9 @@ void __msan_finish_switch_fiber(const void **bottom_old, uptr *size_old) { } } -#if !SANITIZER_SUPPORTS_WEAK_HOOKS -extern "C" { -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -const char* __msan_default_options() { return ""; } -} // extern "C" -#endif +SANITIZER_INTERFACE_WEAK_DEF(const char *, __msan_default_options, void) { + return ""; +} extern "C" { SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h index 17922a888b9c91..1edacbc7504f5d 100644 --- a/compiler-rt/lib/msan/msan_interface_internal.h +++ b/compiler-rt/lib/msan/msan_interface_internal.h @@ -129,8 +129,8 @@ void __msan_set_keep_going(int keep_going); SANITIZER_INTERFACE_ATTRIBUTE int __msan_set_poison_in_malloc(int do_poison); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -/* OPTIONAL */ const char* __msan_default_options(); +SANITIZER_INTERFACE_ATTRIBUTE +const char *__msan_default_options(); // For testing. SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp index 44bf325cd35bb6..49e4a9c21da9c7 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp @@ -87,7 +87,7 @@ void InitializeFlags(Flags *f, const char *env, const char *env_option_name) { // Let a frontend override. parser.ParseString(__tsan_default_options()); #if TSAN_CONTAINS_UBSAN - const char *ubsan_default_options = __ubsan::MaybeCallUbsanDefaultOptions(); + const char *ubsan_default_options = __ubsan_default_options(); ubsan_parser.ParseString(ubsan_default_options); #endif // Override from command line. diff --git a/compiler-rt/lib/ubsan/ubsan_flags.cpp b/compiler-rt/lib/ubsan/ubsan_flags.cpp index 721c2273f133a3..25cefd46ce27ce 100644 --- a/compiler-rt/lib/ubsan/ubsan_flags.cpp +++ b/compiler-rt/lib/ubsan/ubsan_flags.cpp @@ -21,10 +21,6 @@ namespace __ubsan { -const char *MaybeCallUbsanDefaultOptions() { - return (&__ubsan_default_options) ? __ubsan_default_options() : ""; -} - static const char *GetFlag(const char *flag) { // We cannot call getenv() from inside a preinit array initializer if (SANITIZER_CAN_USE_PREINIT_ARRAY) { @@ -66,7 +62,7 @@ void InitializeFlags() { RegisterUbsanFlags(&parser, f); // Override from user-specified string. - parser.ParseString(MaybeCallUbsanDefaultOptions()); + parser.ParseString(__ubsan_default_options()); // Override from environment variable. parser.ParseStringFromEnv("UBSAN_OPTIONS"); InitializeCommonFlags(); diff --git a/compiler-rt/lib/ubsan/ubsan_flags.h b/compiler-rt/lib/ubsan/ubsan_flags.h index daa0d7c701e041..c47009bafe5399 100644 --- a/compiler-rt/lib/ubsan/ubsan_flags.h +++ b/compiler-rt/lib/ubsan/ubsan_flags.h @@ -34,8 +34,6 @@ inline Flags *flags() { return &ubsan_flags; } void InitializeFlags(); void RegisterUbsanFlags(FlagParser *parser, Flags *f); -const char *MaybeCallUbsanDefaultOptions(); - } // namespace __ubsan extern "C" { From 0dacf3b5ac3a8c4079b781c788f758709345883f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 18:04:41 +0100 Subject: [PATCH 097/161] RISCVMatInt.h - remove unnecessary includes. NFCI. Add APInt forward declaration and move include to RISCVMatInt.cpp --- llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp | 4 +--- llvm/lib/Target/RISCV/Utils/RISCVMatInt.h | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp index f390ddb89e3c9a..1f3dead6101121 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.cpp @@ -8,10 +8,8 @@ #include "RISCVMatInt.h" #include "MCTargetDesc/RISCVMCTargetDesc.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/MathExtras.h" -#include namespace llvm { diff --git a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h index b12ae2eade9993..17ca57458b4938 100644 --- a/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h +++ b/llvm/lib/Target/RISCV/Utils/RISCVMatInt.h @@ -9,12 +9,11 @@ #ifndef LLVM_LIB_TARGET_RISCV_MATINT_H #define LLVM_LIB_TARGET_RISCV_MATINT_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MachineValueType.h" #include namespace llvm { +class APInt; namespace RISCVMatInt { struct Inst { From cd5c5c484830e65854cc12cb64a0feb0a9060734 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 8 Sep 2020 18:24:52 +0100 Subject: [PATCH 098/161] CFGUpdate.h - remove unused APInt include. NFCI. --- llvm/include/llvm/Support/CFGUpdate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Support/CFGUpdate.h b/llvm/include/llvm/Support/CFGUpdate.h index af4cd6ed1f1df4..3a12b9d86c18a8 100644 --- a/llvm/include/llvm/Support/CFGUpdate.h +++ b/llvm/include/llvm/Support/CFGUpdate.h @@ -14,7 +14,6 @@ #ifndef LLVM_SUPPORT_CFGUPDATE_H #define LLVM_SUPPORT_CFGUPDATE_H -#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/Support/Compiler.h" From d6f3f612318f31c46b95dd62eee45a75397ccfcf Mon Sep 17 00:00:00 2001 From: Ties Stuij Date: Tue, 8 Sep 2020 18:43:59 +0100 Subject: [PATCH 099/161] Revert "[ARM] Follow AACPS standard for volatile bit-fields access width" This reverts commit 514df1b2bb1ecd1a33327001ea38a347fd2d0380. Some of the buildbots got llvm-lit errors on CodeGen/volatile.c --- clang/include/clang/Basic/CodeGenOptions.def | 6 +- clang/include/clang/Driver/Options.td | 8 +- clang/lib/CodeGen/CGExpr.cpp | 118 +- clang/lib/CodeGen/CGRecordLayout.h | 17 +- clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 166 +- clang/lib/Frontend/CompilerInvocation.cpp | 3 - clang/test/CodeGen/aapcs-bitfield.c | 3292 +----------------- clang/test/CodeGen/bitfield-2.c | 12 +- 8 files changed, 103 insertions(+), 3519 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index f2f29db2334e44..ec77f68062e7a1 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -392,13 +392,9 @@ CODEGENOPT(Addrsig, 1, 0) /// Whether to emit unused static constants. CODEGENOPT(KeepStaticConsts, 1, 0) -/// Whether to follow the AAPCS enforcing at least one read before storing to a volatile bitfield +/// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) -/// Whether to not follow the AAPCS that enforces volatile bit-field access width to be -/// according to the field declaring type width. -CODEGENOPT(AAPCSBitfieldWidth, 1, 1) - #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 81d63330b4279b..4ba5d40117e77c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2363,15 +2363,9 @@ def mno_neg_immediates: Flag<["-"], "mno-neg-immediates">, Group, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Allow use of CMSE (Armv8-M Security Extensions)">; -def ForceAAPCSBitfieldLoad : Flag<["-"], "faapcs-bitfield-load">, Group, +def ForceAAPCSBitfieldLoad : Flag<["-"], "fAAPCSBitfieldLoad">, Group, Flags<[DriverOption,CC1Option]>, HelpText<"Follows the AAPCS standard that all volatile bit-field write generates at least one load. (ARM only).">; -def ForceNoAAPCSBitfieldWidth : Flag<["-"], "fno-aapcs-bitfield-width">, Group, - Flags<[DriverOption,CC1Option]>, - HelpText<"Do not follow the AAPCS standard requirement that volatile bit-field width is dictated by the field container type. (ARM only).">; -def AAPCSBitfieldWidth : Flag<["-"], "faapcs-bitfield-width">, Group, - Flags<[DriverOption,CC1Option]>, - HelpText<"Follow the AAPCS standard requirement stating that volatile bit-field width is dictated by the field container type. (ARM only).">; def mgeneral_regs_only : Flag<["-"], "mgeneral-regs-only">, Group, HelpText<"Generate code which only uses the general purpose registers (AArch64 only)">; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index df024a84462dbd..7351926035e64d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1927,27 +1927,22 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV, llvm::Type *ResLTy = ConvertType(LV.getType()); Address Ptr = LV.getBitFieldAddress(); - llvm::Value *Val = - Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); - - bool UseVolatile = LV.isVolatileQualified() && - Info.VolatileStorageSize != 0 && isAAPCS(CGM.getTarget()); - const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; - const unsigned StorageSize = - UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; + llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load"); + if (Info.IsSigned) { - assert(static_cast(Offset + Info.Size) <= StorageSize); - unsigned HighBits = StorageSize - Offset - Info.Size; + assert(static_cast(Info.Offset + Info.Size) <= Info.StorageSize); + unsigned HighBits = Info.StorageSize - Info.Offset - Info.Size; if (HighBits) Val = Builder.CreateShl(Val, HighBits, "bf.shl"); - if (Offset + HighBits) - Val = Builder.CreateAShr(Val, Offset + HighBits, "bf.ashr"); + if (Info.Offset + HighBits) + Val = Builder.CreateAShr(Val, Info.Offset + HighBits, "bf.ashr"); } else { - if (Offset) - Val = Builder.CreateLShr(Val, Offset, "bf.lshr"); - if (static_cast(Offset) + Info.Size < StorageSize) - Val = Builder.CreateAnd( - Val, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), "bf.clear"); + if (Info.Offset) + Val = Builder.CreateLShr(Val, Info.Offset, "bf.lshr"); + if (static_cast(Info.Offset) + Info.Size < Info.StorageSize) + Val = Builder.CreateAnd(Val, llvm::APInt::getLowBitsSet(Info.StorageSize, + Info.Size), + "bf.clear"); } Val = Builder.CreateIntCast(Val, ResLTy, Info.IsSigned, "bf.cast"); EmitScalarRangeCheck(Val, LV.getType(), Loc); @@ -2149,43 +2144,39 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, /*isSigned=*/false); llvm::Value *MaskedVal = SrcVal; - const bool UseVolatile = CGM.getCodeGenOpts().AAPCSBitfieldWidth && - Dst.isVolatileQualified() && - Info.VolatileStorageSize != 0 && - isAAPCS(CGM.getTarget()); - const unsigned StorageSize = - UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; - const unsigned Offset = UseVolatile ? Info.VolatileOffset : Info.Offset; // See if there are other bits in the bitfield's storage we'll need to load // and mask together with source before storing. - if (StorageSize != Info.Size) { - assert(StorageSize > Info.Size && "Invalid bitfield size."); + if (Info.StorageSize != Info.Size) { + assert(Info.StorageSize > Info.Size && "Invalid bitfield size."); llvm::Value *Val = - Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); + Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load"); // Mask the source value as needed. if (!hasBooleanRepresentation(Dst.getType())) - SrcVal = Builder.CreateAnd( - SrcVal, llvm::APInt::getLowBitsSet(StorageSize, Info.Size), - "bf.value"); + SrcVal = Builder.CreateAnd(SrcVal, + llvm::APInt::getLowBitsSet(Info.StorageSize, + Info.Size), + "bf.value"); MaskedVal = SrcVal; - if (Offset) - SrcVal = Builder.CreateShl(SrcVal, Offset, "bf.shl"); + if (Info.Offset) + SrcVal = Builder.CreateShl(SrcVal, Info.Offset, "bf.shl"); // Mask out the original value. - Val = Builder.CreateAnd( - Val, ~llvm::APInt::getBitsSet(StorageSize, Offset, Offset + Info.Size), - "bf.clear"); + Val = Builder.CreateAnd(Val, + ~llvm::APInt::getBitsSet(Info.StorageSize, + Info.Offset, + Info.Offset + Info.Size), + "bf.clear"); // Or together the unchanged values and the source value. SrcVal = Builder.CreateOr(Val, SrcVal, "bf.set"); } else { - assert(Offset == 0); + assert(Info.Offset == 0); // According to the AACPS: // When a volatile bit-field is written, and its container does not overlap - // with any non-bit-field member, its container must be read exactly once - // and written exactly once using the access width appropriate to the type - // of the container. The two accesses are not atomic. + // with any non-bit-field member, its container must be read exactly once and + // written exactly once using the access width appropriate to the type of the + // container. The two accesses are not atomic. if (Dst.isVolatileQualified() && isAAPCS(CGM.getTarget()) && CGM.getCodeGenOpts().ForceAAPCSBitfieldLoad) Builder.CreateLoad(Ptr, true, "bf.load"); @@ -2200,8 +2191,8 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst, // Sign extend the value if needed. if (Info.IsSigned) { - assert(Info.Size <= StorageSize); - unsigned HighBits = StorageSize - Info.Size; + assert(Info.Size <= Info.StorageSize); + unsigned HighBits = Info.StorageSize - Info.Size; if (HighBits) { ResultVal = Builder.CreateShl(ResultVal, HighBits, "bf.result.shl"); ResultVal = Builder.CreateAShr(ResultVal, HighBits, "bf.result.ashr"); @@ -4213,45 +4204,32 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, if (field->isBitField()) { const CGRecordLayout &RL = - CGM.getTypes().getCGRecordLayout(field->getParent()); + CGM.getTypes().getCGRecordLayout(field->getParent()); const CGBitFieldInfo &Info = RL.getBitFieldInfo(field); - const bool UseVolatile = isAAPCS(CGM.getTarget()) && - CGM.getCodeGenOpts().AAPCSBitfieldWidth && - Info.VolatileStorageSize != 0 && - field->getType() - .withCVRQualifiers(base.getVRQualifiers()) - .isVolatileQualified(); Address Addr = base.getAddress(*this); unsigned Idx = RL.getLLVMFieldNo(field); const RecordDecl *rec = field->getParent(); - if (!UseVolatile) { - if (!IsInPreservedAIRegion && - (!getDebugInfo() || !rec->hasAttr())) { - if (Idx != 0) - // For structs, we GEP to the field that the record layout suggests. - Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); - } else { - llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( - getContext().getRecordType(rec), rec->getLocation()); - Addr = Builder.CreatePreserveStructAccessIndex( - Addr, Idx, getDebugInfoFIndex(rec, field->getFieldIndex()), - DbgInfo); - } + if (!IsInPreservedAIRegion && + (!getDebugInfo() || !rec->hasAttr())) { + if (Idx != 0) + // For structs, we GEP to the field that the record layout suggests. + Addr = Builder.CreateStructGEP(Addr, Idx, field->getName()); + } else { + llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateRecordType( + getContext().getRecordType(rec), rec->getLocation()); + Addr = Builder.CreatePreserveStructAccessIndex(Addr, Idx, + getDebugInfoFIndex(rec, field->getFieldIndex()), + DbgInfo); } - const unsigned SS = - UseVolatile ? Info.VolatileStorageSize : Info.StorageSize; + // Get the access type. - llvm::Type *FieldIntTy = llvm::Type::getIntNTy(getLLVMContext(), SS); + llvm::Type *FieldIntTy = + llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize); if (Addr.getElementType() != FieldIntTy) Addr = Builder.CreateElementBitCast(Addr, FieldIntTy); - if (UseVolatile) { - const unsigned VolatileOffset = Info.VolatileStorageOffset.getQuantity(); - if (VolatileOffset) - Addr = Builder.CreateConstInBoundsGEP(Addr, VolatileOffset); - } QualType fieldType = - field->getType().withCVRQualifiers(base.getVRQualifiers()); + field->getType().withCVRQualifiers(base.getVRQualifiers()); // TODO: Support TBAA for bit fields. LValueBaseInfo FieldBaseInfo(BaseInfo.getAlignmentSource()); return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo, diff --git a/clang/lib/CodeGen/CGRecordLayout.h b/clang/lib/CodeGen/CGRecordLayout.h index e6665b72bcba15..730ee4c438e7e0 100644 --- a/clang/lib/CodeGen/CGRecordLayout.h +++ b/clang/lib/CodeGen/CGRecordLayout.h @@ -46,7 +46,7 @@ namespace CodeGen { /// }; /// /// This will end up as the following LLVM type. The first array is the -/// bitfield, and the second is the padding out to a 4-byte alignment. +/// bitfield, and the second is the padding out to a 4-byte alignmnet. /// /// %t = type { i8, i8, i8, i8, i8, [3 x i8] } /// @@ -80,21 +80,8 @@ struct CGBitFieldInfo { /// The offset of the bitfield storage from the start of the struct. CharUnits StorageOffset; - /// The offset within a contiguous run of bitfields that are represented as a - /// single "field" within the LLVM struct type, taking into account the AAPCS - /// rules for volatile bitfields. This offset is in bits. - unsigned VolatileOffset : 16; - - /// The storage size in bits which should be used when accessing this - /// bitfield. - unsigned VolatileStorageSize; - - /// The offset of the bitfield storage from the start of the struct. - CharUnits VolatileStorageOffset; - CGBitFieldInfo() - : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset(), - VolatileOffset(), VolatileStorageSize(), VolatileStorageOffset() {} + : Offset(), Size(), IsSigned(), StorageSize(), StorageOffset() {} CGBitFieldInfo(unsigned Offset, unsigned Size, bool IsSigned, unsigned StorageSize, CharUnits StorageOffset) diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index ce35880106c20f..4e5d1d3f16f65b 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -109,14 +109,6 @@ struct CGRecordLowering { D->isMsStruct(Context); } - /// Helper function to check if we are targeting AAPCS. - bool isAAPCS() const { - return Context.getTargetInfo().getABI().startswith("aapcs"); - } - - /// Helper function to check if the target machine is BigEndian. - bool isBE() const { return Context.getTargetInfo().isBigEndian(); } - /// The Itanium base layout rule allows virtual bases to overlap /// other bases, which complicates layout in specific ways. /// @@ -180,8 +172,7 @@ struct CGRecordLowering { void lowerUnion(); void accumulateFields(); void accumulateBitFields(RecordDecl::field_iterator Field, - RecordDecl::field_iterator FieldEnd); - void computeVolatileBitfields(); + RecordDecl::field_iterator FieldEnd); void accumulateBases(); void accumulateVPtrs(); void accumulateVBases(); @@ -246,10 +237,6 @@ void CGRecordLowering::setBitFieldInfo( // least-significant-bit. if (DataLayout.isBigEndian()) Info.Offset = Info.StorageSize - (Info.Offset + Info.Size); - - Info.VolatileStorageSize = 0; - Info.VolatileOffset = 0; - Info.VolatileStorageOffset = CharUnits::Zero(); } void CGRecordLowering::lower(bool NVBaseType) { @@ -274,21 +261,15 @@ void CGRecordLowering::lower(bool NVBaseType) { // 8) Format the complete list of members in a way that can be consumed by // CodeGenTypes::ComputeRecordLayout. CharUnits Size = NVBaseType ? Layout.getNonVirtualSize() : Layout.getSize(); - if (D->isUnion()) { - lowerUnion(); - computeVolatileBitfields(); - return; - } + if (D->isUnion()) + return lowerUnion(); accumulateFields(); // RD implies C++. if (RD) { accumulateVPtrs(); accumulateBases(); - if (Members.empty()) { - appendPaddingBytes(Size); - computeVolatileBitfields(); - return; - } + if (Members.empty()) + return appendPaddingBytes(Size); if (!NVBaseType) accumulateVBases(); } @@ -300,7 +281,6 @@ void CGRecordLowering::lower(bool NVBaseType) { Members.pop_back(); calculateZeroInit(); fillOutputFields(); - computeVolatileBitfields(); } void CGRecordLowering::lowerUnion() { @@ -438,9 +418,9 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, if (OffsetInRecord < 8 || !llvm::isPowerOf2_64(OffsetInRecord) || !DataLayout.fitsInLegalInteger(OffsetInRecord)) return false; - // Make sure StartBitOffset is naturally aligned if it is treated as an + // Make sure StartBitOffset is natually aligned if it is treated as an // IType integer. - if (StartBitOffset % + if (StartBitOffset % Context.toBits(getAlignment(getIntNType(OffsetInRecord))) != 0) return false; @@ -523,123 +503,6 @@ void CGRecordLowering::accumulateBases() { } } -/// The AAPCS that defines that, when possible, bit-fields should -/// be accessed using containers of the declared type width: -/// When a volatile bit-field is read, and its container does not overlap with -/// any non-bit-field member or any zero length bit-field member, its container -/// must be read exactly once using the access width appropriate to the type of -/// the container. When a volatile bit-field is written, and its container does -/// not overlap with any non-bit-field member or any zero-length bit-field -/// member, its container must be read exactly once and written exactly once -/// using the access width appropriate to the type of the container. The two -/// accesses are not atomic. -/// -/// Enforcing the width restriction can be disabled using -/// -fno-aapcs-bitfield-width. -void CGRecordLowering::computeVolatileBitfields() { - if (!isAAPCS() || !Types.getCodeGenOpts().AAPCSBitfieldWidth) - return; - - for (auto &I : BitFields) { - const FieldDecl *Field = I.first; - CGBitFieldInfo &Info = I.second; - llvm::Type *ResLTy = Types.ConvertTypeForMem(Field->getType()); - // If the record alignment is less than the type width, we can't enforce a - // aligned load, bail out. - if ((uint64_t)(Context.toBits(Layout.getAlignment())) < - ResLTy->getPrimitiveSizeInBits()) - continue; - // CGRecordLowering::setBitFieldInfo() pre-adjusts the bit-field offsets - // for big-endian targets, but it assumes a container of width - // Info.StorageSize. Since AAPCS uses a different container size (width - // of the type), we first undo that calculation here and redo it once - // the bit-field offset within the new container is calculated. - const unsigned OldOffset = - isBE() ? Info.StorageSize - (Info.Offset + Info.Size) : Info.Offset; - // Offset to the bit-field from the beginning of the struct. - const unsigned AbsoluteOffset = - Context.toBits(Info.StorageOffset) + OldOffset; - - // Container size is the width of the bit-field type. - const unsigned StorageSize = ResLTy->getPrimitiveSizeInBits(); - // Nothing to do if the access uses the desired - // container width and is naturally aligned. - if (Info.StorageSize == StorageSize && (OldOffset % StorageSize == 0)) - continue; - - // Offset within the container. - unsigned Offset = AbsoluteOffset & (StorageSize - 1); - // Bail out if an aligned load of the container cannot cover the entire - // bit-field. This can happen for example, if the bit-field is part of a - // packed struct. AAPCS does not define access rules for such cases, we let - // clang to follow its own rules. - if (Offset + Info.Size > StorageSize) - continue; - - // Re-adjust offsets for big-endian targets. - if (isBE()) - Offset = StorageSize - (Offset + Info.Size); - - const CharUnits StorageOffset = - Context.toCharUnitsFromBits(AbsoluteOffset & ~(StorageSize - 1)); - const CharUnits End = StorageOffset + - Context.toCharUnitsFromBits(StorageSize) - - CharUnits::One(); - - const ASTRecordLayout &Layout = - Context.getASTRecordLayout(Field->getParent()); - // If we access outside memory outside the record, than bail out. - const CharUnits RecordSize = Layout.getSize(); - if (End >= RecordSize) - continue; - - // Bail out if performing this load would access non-bit-fields members. - bool Conflict = false; - for (const auto *F : D->fields()) { - // Allow sized bit-fields overlaps. - if (F->isBitField() && !F->isZeroLengthBitField(Context)) - continue; - - const CharUnits FOffset = Context.toCharUnitsFromBits( - Layout.getFieldOffset(F->getFieldIndex())); - - // As C11 defines, a zero sized bit-field defines a barrier, so - // fields after and before it should be race condition free. - // The AAPCS acknowledges it and imposes no restritions when the - // natural container overlaps a zero-length bit-field. - if (F->isZeroLengthBitField(Context)) { - if (End > FOffset && StorageOffset < FOffset) { - Conflict = true; - break; - } - } - - const CharUnits FEnd = - FOffset + - Context.toCharUnitsFromBits( - Types.ConvertTypeForMem(F->getType())->getPrimitiveSizeInBits()) - - CharUnits::One(); - // If no overlap, continue. - if (End < FOffset || FEnd < StorageOffset) - continue; - - // The desired load overlaps a non-bit-field member, bail out. - Conflict = true; - break; - } - - if (Conflict) - continue; - // Write the new bit-field access parameters. - // As the storage offset now is defined as the number of elements from the - // start of the structure, we should divide the Offset by the element size. - Info.VolatileStorageOffset = - StorageOffset / Context.toCharUnitsFromBits(StorageSize).getQuantity(); - Info.VolatileStorageSize = StorageSize; - Info.VolatileOffset = Offset; - } -} - void CGRecordLowering::accumulateVPtrs() { if (Layout.hasOwnVFPtr()) Members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::VFPtr, @@ -985,10 +848,8 @@ CodeGenTypes::ComputeRecordLayout(const RecordDecl *D, llvm::StructType *Ty) { assert(Info.StorageSize <= SL->getSizeInBits() && "Union not large enough for bitfield storage"); } else { - assert((Info.StorageSize == - getDataLayout().getTypeAllocSizeInBits(ElementTy) || - Info.VolatileStorageSize == - getDataLayout().getTypeAllocSizeInBits(ElementTy)) && + assert(Info.StorageSize == + getDataLayout().getTypeAllocSizeInBits(ElementTy) && "Storage size does not match the element type size"); } assert(Info.Size > 0 && "Empty bitfield!"); @@ -1036,12 +897,11 @@ LLVM_DUMP_METHOD void CGRecordLayout::dump() const { void CGBitFieldInfo::print(raw_ostream &OS) const { OS << ""; + << " StorageOffset:" << StorageOffset.getQuantity() << ">"; } LLVM_DUMP_METHOD void CGBitFieldInfo::dump() const { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 1fbeb458a9d23f..fbccff11562c17 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1453,9 +1453,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); - Opts.AAPCSBitfieldWidth = Args.hasFlag(OPT_AAPCSBitfieldWidth, - OPT_ForceNoAAPCSBitfieldWidth, - true); return Success; } diff --git a/clang/test/CodeGen/aapcs-bitfield.c b/clang/test/CodeGen/aapcs-bitfield.c index 13db68d6ae81bd..4fc889bcf379ec 100644 --- a/clang/test/CodeGen/aapcs-bitfield.c +++ b/clang/test/CodeGen/aapcs-bitfield.c @@ -1,12 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=LE -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fno-aapcs-bitfield-width | FileCheck %s -check-prefix=BE -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=LENUMLOADS -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load -fno-aapcs-bitfield-width | FileCheck %s -check-prefixes=BENUMLOADS -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LEWIDTH -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BEWIDTH -// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=LEWIDTHNUM -// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -faapcs-bitfield-load | FileCheck %s -check-prefixes=BEWIDTHNUM +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=LE +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 | FileCheck %s -check-prefix=BE +// RUN: %clang_cc1 -triple armv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=LE,LENUMLOADS +// RUN: %clang_cc1 -triple armebv8-none-linux-eabi %s -emit-llvm -o - -O3 -fAAPCSBitfieldLoad | FileCheck %s -check-prefixes=BE,BENUMLOADS struct st0 { short c : 7; @@ -29,57 +25,6 @@ struct st0 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st0_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st0_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st0_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st0_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st0_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st0_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st0_check_load(struct st0 *m) { return m->c; } @@ -102,60 +47,6 @@ int st0_check_load(struct st0 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st0_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st0_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st0_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st0_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st0_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st0_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST0:%.*]], %struct.st0* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st0_check_store(struct st0 *m) { m->c = 1; } @@ -182,57 +73,6 @@ struct st1 { // BE-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st1_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st1_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st1_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st1_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st1_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 10 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st1_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 10 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 10 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st1_check_load(struct st1 *m) { return m->c; } @@ -255,60 +95,6 @@ int st1_check_load(struct st1 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st1_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 -// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st1_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st1_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 -// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st1_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st1_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 1023 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1024 -// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st1_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST1:%.*]], %struct.st1* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -64 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void st1_check_store(struct st1 *m) { m->c = 1; } @@ -335,57 +121,6 @@ struct st2 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st2_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st2_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st2_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st2_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st2_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st2_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st2_check_load(struct st2 *m) { return m->c; } @@ -408,60 +143,6 @@ int st2_check_load(struct st2 *m) { // BE-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st2_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st2_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BENUMLOADS-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st2_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st2_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTH-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st2_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st2_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST2:%.*]], %struct.st2* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BEWIDTHNUM-NEXT: store i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st2_check_store(struct st2 *m) { m->c = 1; } @@ -487,57 +168,6 @@ struct st3 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st3_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 1 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st3_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st3_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st3_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st3_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 9 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st3_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 9 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st3_check_load(struct st3 *m) { return m->c; } @@ -560,60 +190,6 @@ int st3_check_load(struct st3 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st3_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -128 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st3_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST3:%.*]], %struct.st3* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP0]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st3_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st3_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 -// BEWIDTH-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st3_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -128 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st3_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st3* [[M:%.*]] to i16* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 511 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 -// BEWIDTHNUM-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st3_check_store(struct st3 *m) { m->c = 1; } @@ -645,68 +221,6 @@ struct st4 { // BE-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st4_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 2 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 -// LENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st4_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 9 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 11 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = zext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: [[SEXT:%.*]] = shl i32 [[BF_CAST]], 24 -// BENUMLOADS-NEXT: [[CONV:%.*]] = ashr exact i32 [[SEXT]], 24 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st4_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st4_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st4_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 2 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st4_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 3 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st4_check_load(struct st4 *m) { return m->c; } @@ -729,64 +243,6 @@ int st4_check_load(struct st4 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st4_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -15873 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 512 -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st4_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -125 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 4 -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st4_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st4_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 -// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st4_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -63 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 2 -// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st4_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st4* [[M:%.*]] to i8* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP1]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -125 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 4 -// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[TMP1]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void st4_check_store(struct st4 *m) { m->c = 1; } @@ -809,60 +265,6 @@ void st4_check_store(struct st4 *m) { // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st4_check_nonv_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st4_check_nonv_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st4_check_nonv_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st4_check_nonv_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st4_check_nonv_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -512 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st4_check_nonv_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST4:%.*]], %struct.st4* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 127 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void st4_check_nonv_store(struct st4 *m) { m->b = 1; } @@ -889,57 +291,6 @@ struct st5 { // BE-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[CONV]] // -// LENUMLOADS-LABEL: @st5_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[CONV]] -// -// BENUMLOADS-LABEL: @st5_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[CONV]] -// -// LEWIDTH-LABEL: @st5_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: ret i32 [[CONV]] -// -// BEWIDTH-LABEL: @st5_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: ret i32 [[CONV]] -// -// LEWIDTHNUM-LABEL: @st5_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[CONV]] -// -// BEWIDTHNUM-LABEL: @st5_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[CONV]] -// int st5_check_load(struct st5 *m) { return m->c; } @@ -962,60 +313,6 @@ int st5_check_load(struct st5 *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st5_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st5_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 -// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st5_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st5_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 -// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st5_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st5_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST5:%.*]], %struct.st5* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 8 -// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[C]], align 2 -// BEWIDTHNUM-NEXT: ret void -// void st5_check_store(struct st5 *m) { m->c = 1; } @@ -1034,7 +331,7 @@ struct st6 { // LE-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 // LE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -1052,7 +349,7 @@ struct st6 { // BE-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 @@ -1062,114 +359,6 @@ struct st6 { // BE-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] // BE-NEXT: ret i32 [[ADD4]] // -// LENUMLOADS-LABEL: @st6_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 -// LENUMLOADS-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 -// LENUMLOADS-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 -// LENUMLOADS-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] -// LENUMLOADS-NEXT: ret i32 [[ADD5]] -// -// BENUMLOADS-LABEL: @st6_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// BENUMLOADS-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 -// BENUMLOADS-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 -// BENUMLOADS-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] -// BENUMLOADS-NEXT: ret i32 [[ADD4]] -// -// LEWIDTH-LABEL: @st6_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 -// LEWIDTH-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 -// LEWIDTH-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 -// LEWIDTH-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] -// LEWIDTH-NEXT: ret i32 [[ADD5]] -// -// BEWIDTH-LABEL: @st6_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// BEWIDTH-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 -// BEWIDTH-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 -// BEWIDTH-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] -// BEWIDTH-NEXT: ret i32 [[ADD4]] -// -// LEWIDTHNUM-LABEL: @st6_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 4 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i16 [[BF_SHL]], 4 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl i8 [[BF_LOAD1]], 3 -// LEWIDTHNUM-NEXT: [[BF_ASHR3:%.*]] = ashr exact i8 [[BF_SHL2]], 3 -// LEWIDTHNUM-NEXT: [[BF_CAST4:%.*]] = sext i8 [[BF_ASHR3]] to i32 -// LEWIDTHNUM-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD]], [[BF_CAST4]] -// LEWIDTHNUM-NEXT: ret i32 [[ADD5]] -// -// BEWIDTHNUM-LABEL: @st6_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_LOAD]], 4 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[B]], align 2, !tbaa !3 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[BF_CAST]], [[CONV]] -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[C]], align 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR2:%.*]] = ashr i8 [[BF_LOAD1]], 3 -// BEWIDTHNUM-NEXT: [[BF_CAST3:%.*]] = sext i8 [[BF_ASHR2]] to i32 -// BEWIDTHNUM-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD]], [[BF_CAST3]] -// BEWIDTHNUM-NEXT: ret i32 [[ADD4]] -// int st6_check_load(volatile struct st6 *m) { int x = m->a; x += m->b; @@ -1185,7 +374,7 @@ int st6_check_load(volatile struct st6 *m) { // LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 // LE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// LE-NEXT: store i8 2, i8* [[B]], align 2 // LE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // LE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // LE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 @@ -1201,7 +390,7 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 // BE-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BE-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 +// BE-NEXT: store i8 2, i8* [[B]], align 2 // BE-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 // BE-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 // BE-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 @@ -1209,102 +398,6 @@ int st6_check_load(volatile struct st6 *m) { // BE-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st6_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// LENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 -// LENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 -// LENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st6_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 -// BENUMLOADS-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BENUMLOADS-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 -// BENUMLOADS-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 -// BENUMLOADS-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st6_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 -// LEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 -// LEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st6_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 -// BEWIDTH-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTH-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 -// BEWIDTH-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 -// BEWIDTH-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st6_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -4096 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], -32 -// LEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 3 -// LEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st6_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST6:%.*]], %struct.st6* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], 15 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 16 -// BEWIDTHNUM-NEXT: store i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: store i8 2, i8* [[B]], align 2, !tbaa !3 -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST6]], %struct.st6* [[M]], i32 0, i32 2 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load i8, i8* [[C]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR2:%.*]] = and i8 [[BF_LOAD1]], 7 -// BEWIDTHNUM-NEXT: [[BF_SET3:%.*]] = or i8 [[BF_CLEAR2]], 24 -// BEWIDTHNUM-NEXT: store i8 [[BF_SET3]], i8* [[C]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void st6_check_store(struct st6 *m) { m->a = 1; m->b = 2; @@ -1325,10 +418,10 @@ struct st7b { // LE-LABEL: @st7_check_load( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// LE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 // LE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// LE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 // LE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // LE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -1342,10 +435,10 @@ struct st7b { // BE-LABEL: @st7_check_load( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 +// BE-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4 // BE-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 +// BE-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4 // BE-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 // BE-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 @@ -1355,105 +448,6 @@ struct st7b { // BE-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] // BE-NEXT: ret i32 [[ADD3]] // -// LENUMLOADS-LABEL: @st7_check_load( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// LENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// LENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// LENUMLOADS-NEXT: ret i32 [[ADD3]] -// -// BENUMLOADS-LABEL: @st7_check_load( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// BENUMLOADS-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// BENUMLOADS-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// BENUMLOADS-NEXT: ret i32 [[ADD3]] -// -// LEWIDTH-LABEL: @st7_check_load( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// LEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// LEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// LEWIDTH-NEXT: ret i32 [[ADD3]] -// -// BEWIDTH-LABEL: @st7_check_load( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// BEWIDTH-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTH-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// BEWIDTH-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTH-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// BEWIDTH-NEXT: ret i32 [[ADD3]] -// -// LEWIDTHNUM-LABEL: @st7_check_load( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// LEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 3 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i8 [[BF_SHL]], 3 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// LEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// LEWIDTHNUM-NEXT: ret i32 [[ADD3]] -// -// BEWIDTHNUM-LABEL: @st7_check_load( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load i8, i8* [[X]], align 4, !tbaa !8 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 -// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = load volatile i8, i8* [[A]], align 4, !tbaa !11 -// BEWIDTHNUM-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], [[CONV]] -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_LOAD]], 3 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 -// BEWIDTHNUM-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[BF_CAST]] -// BEWIDTHNUM-NEXT: ret i32 [[ADD3]] -// int st7_check_load(struct st7b *m) { int r = m->x; r += m->y.a; @@ -1464,9 +458,9 @@ int st7_check_load(struct st7b *m) { // LE-LABEL: @st7_check_store( // LE-NEXT: entry: // LE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// LE-NEXT: store i8 1, i8* [[X]], align 4 // LE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// LE-NEXT: store volatile i8 2, i8* [[A]], align 4 // LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // LE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 @@ -1477,9 +471,9 @@ int st7_check_load(struct st7b *m) { // BE-LABEL: @st7_check_store( // BE-NEXT: entry: // BE-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 +// BE-NEXT: store i8 1, i8* [[X]], align 4 // BE-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BE-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 +// BE-NEXT: store volatile i8 2, i8* [[A]], align 4 // BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 // BE-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 @@ -1487,84 +481,6 @@ int st7_check_load(struct st7b *m) { // BE-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @st7_check_store( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// LENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 -// LENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @st7_check_store( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// BENUMLOADS-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BENUMLOADS-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 -// BENUMLOADS-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @st7_check_store( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// LEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 -// LEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @st7_check_store( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// BEWIDTH-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTH-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 -// BEWIDTH-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @st7_check_store( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// LEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// LEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -32 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 3 -// LEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @st7_check_store( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_ST7B:%.*]], %struct.st7b* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: store i8 1, i8* [[X]], align 4, !tbaa !8 -// BEWIDTHNUM-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 0 -// BEWIDTHNUM-NEXT: store volatile i8 2, i8* [[A]], align 4, !tbaa !11 -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST7B]], %struct.st7b* [[M]], i32 0, i32 2, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], 7 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], 24 -// BEWIDTHNUM-NEXT: store volatile i8 [[BF_SET]], i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void st7_check_store(struct st7b *m) { m->x = 1; m->y.a = 2; @@ -1588,42 +504,6 @@ struct st8 { // BE-NEXT: store i16 -1, i16* [[TMP0]], align 4 // BE-NEXT: ret i32 65535 // -// LENUMLOADS-LABEL: @st8_check_assignment( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret i32 65535 -// -// BENUMLOADS-LABEL: @st8_check_assignment( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret i32 65535 -// -// LEWIDTH-LABEL: @st8_check_assignment( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret i32 65535 -// -// BEWIDTH-LABEL: @st8_check_assignment( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret i32 65535 -// -// LEWIDTHNUM-LABEL: @st8_check_assignment( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret i32 65535 -// -// BEWIDTHNUM-LABEL: @st8_check_assignment( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST8:%.*]], %struct.st8* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: store i16 -1, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret i32 65535 -// int st8_check_assignment(struct st8 *m) { return m->f = 0xffff; } @@ -1646,50 +526,6 @@ struct st9{ // BE-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // -// LENUMLOADS-LABEL: @read_st9( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// BENUMLOADS-LABEL: @read_st9( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTH-LABEL: @read_st9( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 -// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTH-LABEL: @read_st9( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 -// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTHNUM-LABEL: @read_st9( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 24 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr exact i32 [[BF_SHL]], 24 -// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTHNUM-LABEL: @read_st9( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_LOAD]], 24 -// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// int read_st9(volatile struct st9 *m) { return m->f; } @@ -1697,65 +533,17 @@ int read_st9(volatile struct st9 *m) { // LE-LABEL: @store_st9( // LE-NEXT: entry: // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // LE-NEXT: ret void // // BE-LABEL: @store_st9( // BE-NEXT: entry: // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st9( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st9( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: store volatile i8 1, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st9( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st9( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st9( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -256 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 1 -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st9( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], 16777215 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 16777216 -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void store_st9(volatile struct st9 *m) { m->f = 1; } @@ -1765,6 +553,7 @@ void store_st9(volatile struct st9 *m) { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // LE-NEXT: ret void // @@ -1773,75 +562,10 @@ void store_st9(volatile struct st9 *m) { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st9( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st9( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST9:%.*]], %struct.st9* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st9( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st9( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st9( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st9( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st9* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_st9(volatile struct st9 *m) { ++m->f; } @@ -1869,56 +593,6 @@ struct st10{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // -// LENUMLOADS-LABEL: @read_st10( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 7 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// BENUMLOADS-LABEL: @read_st10( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i16 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i16 [[BF_SHL]], 8 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_ASHR]] to i32 -// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTH-LABEL: @read_st10( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTH-LABEL: @read_st10( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTHNUM-LABEL: @read_st10( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 23 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTHNUM-LABEL: @read_st10( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 24 -// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// int read_st10(volatile struct st10 *m) { return m->f; } @@ -1941,60 +615,6 @@ int read_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st10( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -511 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 2 -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st10( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD]], -32641 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], 128 -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st10( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st10( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st10( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -511 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 2 -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st10( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -2139095041 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 8388608 -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void store_st10(volatile struct st10 *m) { m->f = 1; } @@ -2023,78 +643,6 @@ void store_st10(volatile struct st10 *m) { // BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st10( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 2 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 510 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -511 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st10( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST10:%.*]], %struct.st10* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i16 [[BF_LOAD]], 128 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i16 [[TMP1]], 32640 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD1]], -32641 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st10( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st10( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st10( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 2 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 510 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -511 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st10( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st10* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 8388608 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 2139095040 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -2139095041 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_st10(volatile struct st10 *m) { ++m->f; } @@ -2118,48 +666,6 @@ struct st11{ // BE-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 // BE-NEXT: ret i32 [[BF_CAST]] // -// LENUMLOADS-LABEL: @read_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// BENUMLOADS-LABEL: @read_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTH-LABEL: @read_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: ret i32 [[BF_CAST]] -// -// BEWIDTH-LABEL: @read_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: ret i32 [[BF_CAST]] -// -// LEWIDTHNUM-LABEL: @read_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] -// -// BEWIDTHNUM-LABEL: @read_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = sext i16 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: ret i32 [[BF_CAST]] -// int read_st11(volatile struct st11 *m) { return m->f; } @@ -2167,55 +673,17 @@ int read_st11(volatile struct st11 *m) { // LE-LABEL: @store_st11( // LE-NEXT: entry: // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 1, i16* [[F]], align 1 // LE-NEXT: ret void // // BE-LABEL: @store_st11( // BE-NEXT: entry: // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 +// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 1, i16* [[F]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: store volatile i16 1, i16* [[F]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: store volatile i16 1, i16* [[F]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: store volatile i16 1, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void store_st11(volatile struct st11 *m) { m->f = 1; } @@ -2225,6 +693,7 @@ void store_st11(volatile struct st11 *m) { // LE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // LE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // LE-NEXT: ret void // @@ -2233,61 +702,10 @@ void store_st11(volatile struct st11 *m) { // BE-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 // BE-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// LENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// BENUMLOADS-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i16 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: store volatile i16 [[INC]], i16* [[F]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_st11(volatile struct st11 *m) { ++m->f; } @@ -2295,67 +713,19 @@ void increment_st11(volatile struct st11 *m) { // LE-LABEL: @increment_e_st11( // LE-NEXT: entry: // LE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// LE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 // LE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// LE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 // LE-NEXT: ret void // // BE-LABEL: @increment_e_st11( // BE-NEXT: entry: // BE-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 +// BE-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4 // BE-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 +// BE-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_e_st11( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_e_st11( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_e_st11( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_e_st11( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_e_st11( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_e_st11( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT_ST11:%.*]], %struct.st11* [[M:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = load volatile i8, i8* [[E]], align 4, !tbaa !12 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[TMP0]], 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[E]], align 4, !tbaa !12 -// BEWIDTHNUM-NEXT: ret void -// void increment_e_st11(volatile struct st11 *m) { ++m->e; } @@ -2381,54 +751,6 @@ struct st12{ // BE-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 // BE-NEXT: ret i32 [[BF_ASHR]] // -// LENUMLOADS-LABEL: @read_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// LENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// LENUMLOADS-NEXT: ret i32 [[BF_ASHR]] -// -// BENUMLOADS-LABEL: @read_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// BENUMLOADS-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// BENUMLOADS-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTH-LABEL: @read_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// LEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// LEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTH-LABEL: @read_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// BEWIDTH-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// BEWIDTH-NEXT: ret i32 [[BF_ASHR]] -// -// LEWIDTHNUM-LABEL: @read_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// LEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// LEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// -// BEWIDTHNUM-LABEL: @read_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl i32 [[BF_LOAD]], 8 -// BEWIDTHNUM-NEXT: [[BF_ASHR:%.*]] = ashr i32 [[BF_SHL]], 16 -// BEWIDTHNUM-NEXT: ret i32 [[BF_ASHR]] -// int read_st12(volatile struct st12 *m) { return m->f; } @@ -2451,60 +773,6 @@ int read_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @store_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @store_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @store_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @store_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @store_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @store_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD]], -16776961 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], 256 -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void store_st12(volatile struct st12 *m) { m->f = 1; } @@ -2533,78 +801,6 @@ void store_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC3:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = and i32 [[INC3]], 16776960 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16776961 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_st12(volatile struct st12 *m) { ++m->f; } @@ -2633,78 +829,6 @@ void increment_st12(volatile struct st12 *m) { // BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_e_st12( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_e_st12( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_e_st12( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_e_st12( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_e_st12( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 255 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -256 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_e_st12( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st12* [[M:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 16777216 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -16777216 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 16777215 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_e_st12(volatile struct st12 *m) { ++m->e; } @@ -2742,90 +866,6 @@ struct st13 { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_b_st13( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_st13( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_st13( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_st13( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_st13( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_st13( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st13* [[S:%.*]] to i40* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_b_st13(volatile struct st13 *s) { s->b++; } @@ -2839,6 +879,7 @@ struct st14 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -2847,61 +888,10 @@ struct st14 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_a_st14( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_st14( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_st14( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_st14( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_st14( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_st14( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST14:%.*]], %struct.st14* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_a_st14(volatile struct st14 *s) { s->a++; } @@ -2915,6 +905,7 @@ struct st15 { // LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // LE-NEXT: ret void // @@ -2923,61 +914,10 @@ struct st15 { // BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 // BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 +// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 // BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_a_st15( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_st15( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_st15( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_st15( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_st15( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_st15( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ST15:%.*]], %struct.st15* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_a_st15(volatile struct st15 *s) { s->a++; } @@ -3015,84 +955,6 @@ struct st16 { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_a_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_a_st16(struct st16 *s) { s->a++; } @@ -3125,90 +987,6 @@ void increment_a_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_b_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_b_st16(struct st16 *s) { s->b++; } @@ -3241,90 +1019,6 @@ void increment_b_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_c_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_c_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_c_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_c_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_c_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294967296 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_c_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], 4294967295 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_c_st16(struct st16 *s) { s->c++; } @@ -3359,96 +1053,6 @@ void increment_c_st16(struct st16 *s) { // BE-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_d_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_d_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_d_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTH-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTH-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTH-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_d_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTH-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTH-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTH-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTH-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_d_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LEWIDTHNUM-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -281470681743361 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_d_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load i64, i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BEWIDTHNUM-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BEWIDTHNUM-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD]], -4294901761 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BEWIDTHNUM-NEXT: store i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_d_st16(struct st16 *s) { s->d++; } @@ -3481,68 +1085,6 @@ void increment_d_st16(struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_a_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_a_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_a_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_a_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_a_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_a_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_a_st16(volatile struct st16 *s) { s->a++; } @@ -3577,88 +1119,6 @@ void increment_v_a_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_b_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_b_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_b_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_b_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_b_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_b_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_b_st16(volatile struct st16 *s) { s->b++; } @@ -3693,74 +1153,6 @@ void increment_v_b_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_c_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i64 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_c_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[C]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i64 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i64 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i64 [[TMP2]], 32 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], 4294967295 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_c_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_c_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_c_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_c_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast i48* [[TMP0]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: store volatile i32 [[INC]], i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_c_st16(volatile struct st16 *s) { s->c++; } @@ -3797,90 +1189,6 @@ void increment_v_c_st16(volatile struct st16 *s) { // BE-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_d_st16( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i64 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[TMP2]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP3:%.*]] = and i32 [[INC]], 65535 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = zext i32 [[TMP3]] to i64 -// LENUMLOADS-NEXT: [[BF_SHL2:%.*]] = shl nuw nsw i64 [[BF_VALUE]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -281470681743361 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_SHL2]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_d_st16( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT_ST16:%.*]], %struct.st16* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i48* [[D]] to i64* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i64, i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = trunc i64 [[BF_LOAD]] to i32 -// BENUMLOADS-NEXT: [[INC4:%.*]] = add i32 [[TMP1]], 65536 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = and i32 [[INC4]], -65536 -// BENUMLOADS-NEXT: [[BF_SHL2:%.*]] = zext i32 [[TMP2]] to i64 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i64 [[BF_LOAD1]], -4294901761 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i64 [[BF_CLEAR]], [[BF_SHL2]] -// BENUMLOADS-NEXT: store volatile i64 [[BF_SET]], i64* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_d_st16( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_d_st16( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTH-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_d_st16( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 65535 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -65536 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_d_st16( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st16* [[S:%.*]] to i32* -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 3 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = add i32 [[BF_LOAD]], 65536 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP2]], -65536 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 65535 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP1]], align 4 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_d_st16(volatile struct st16 *s) { s->d++; } @@ -3919,90 +1227,6 @@ char c : 8; // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_b_st17( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// LENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_b_st17( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// BENUMLOADS-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_b_st17( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// LEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTH-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// LEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_b_st17( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// BEWIDTH-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// BEWIDTH-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTH-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTH-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_b_st17( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i32 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = zext i32 [[INC]] to i40 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -4294967296 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// LEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_b_st17( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 8 -// BEWIDTHNUM-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i32 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add nsw i32 [[BF_CAST]], 1 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = zext i32 [[INC]] to i40 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 8 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 255 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// BEWIDTHNUM-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_b_st17(volatile struct st17 *s) { s->b++; } @@ -4035,458 +1259,6 @@ void increment_v_b_st17(volatile struct st17 *s) { // BE-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 // BE-NEXT: ret void // -// LENUMLOADS-LABEL: @increment_v_c_st17( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i40 [[BF_LOAD]], 32 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[TMP1]] to i8 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 -// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[INC]] to i40 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: [[BF_SHL:%.*]] = shl nuw i40 [[TMP2]], 32 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], 4294967295 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_SHL]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_v_c_st17( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast %struct.st17* [[S:%.*]] to i40* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i40 [[BF_LOAD]] to i8 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_CAST]], 1 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = zext i8 [[INC]] to i40 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i40, i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i40 [[BF_LOAD1]], -256 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i40 [[BF_CLEAR]], [[TMP1]] -// BENUMLOADS-NEXT: store volatile i40 [[BF_SET]], i40* [[TMP0]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_v_c_st17( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_v_c_st17( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_v_c_st17( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_v_c_st17( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST17:%.*]], %struct.st17* [[S:%.*]], i32 0, i32 0, i32 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 1 -// BEWIDTHNUM-NEXT: ret void -// void increment_v_c_st17(volatile struct st17 *s) { s->c++; } - -// A zero bitfield should block, as the C11 specification -// requires a and b to be different memory positions -struct zero_bitfield { - int a : 8; - char : 0; - int b : 8; -}; - -// LE-LABEL: @increment_a_zero_bitfield( -// LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_a_zero_bitfield( -// BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_a_zero_bitfield( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_zero_bitfield( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_zero_bitfield( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_zero_bitfield( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// -void increment_a_zero_bitfield(volatile struct zero_bitfield *s) { - s->a++; -} - -// LE-LABEL: @increment_b_zero_bitfield( -// LE-NEXT: entry: -// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_b_zero_bitfield( -// BE-NEXT: entry: -// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BE-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BE-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_b_zero_bitfield( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// LENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_zero_bitfield( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// BENUMLOADS-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_zero_bitfield( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_zero_bitfield( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTH-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTH-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD:%.*]], %struct.zero_bitfield* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: [[INC:%.*]] = add i8 [[BF_LOAD]], 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[INC]], i8* [[B]], align 1 -// BEWIDTHNUM-NEXT: ret void -// -void increment_b_zero_bitfield(volatile struct zero_bitfield *s) { - s->b++; -} - -// The zero bitfield here does not affect -struct zero_bitfield_ok { - short a : 8; - char a1 : 8; - long : 0; - int b : 24; -}; - -// LE-LABEL: @increment_a_zero_bitfield_ok( -// LE-NEXT: entry: -// LE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LE-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 -// LE-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 -// LE-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] -// LE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// LE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LE-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 -// LE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 -// LE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] -// LE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_a_zero_bitfield_ok( -// BE-NEXT: entry: -// BE-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BE-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BE-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BE-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 -// BE-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] -// BE-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// BE-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BE-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 -// BE-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] -// BE-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD1]], 8 -// LENUMLOADS-NEXT: [[BF_CAST:%.*]] = trunc i16 [[TMP1]] to i8 -// LENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[BF_CAST]], [[CONV]] -// LENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// LENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_SHL6:%.*]] = shl nuw i16 [[TMP2]], 8 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], 255 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_SHL6]], [[BF_CLEAR]] -// LENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_a_zero_bitfield_ok( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BENUMLOADS-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[SEXT:%.*]] = trunc i16 [[BF_LOAD1]] to i8 -// BENUMLOADS-NEXT: [[ADD:%.*]] = add i8 [[SEXT]], [[CONV]] -// BENUMLOADS-NEXT: [[TMP2:%.*]] = zext i8 [[ADD]] to i16 -// BENUMLOADS-NEXT: [[BF_LOAD5:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i16 [[BF_LOAD5]], -256 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i16 [[BF_CLEAR]], [[TMP2]] -// BENUMLOADS-NEXT: store volatile i16 [[BF_SET]], i16* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_a_zero_bitfield_ok( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LEWIDTH-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// LEWIDTH-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 -// LEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// LEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_a_zero_bitfield_ok( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BEWIDTH-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BEWIDTH-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// BEWIDTH-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 -// BEWIDTH-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// BEWIDTH-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[BF_LOAD]] to i8 -// LEWIDTHNUM-NEXT: [[TMP1:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// LEWIDTHNUM-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP2]], align 1 -// LEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// LEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP2]], align 1 -// LEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP2]], align 1 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_a_zero_bitfield_ok( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 0 -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i16, i16* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = lshr i16 [[BF_LOAD]], 8 -// BEWIDTHNUM-NEXT: [[CONV:%.*]] = trunc i16 [[TMP1]] to i8 -// BEWIDTHNUM-NEXT: [[TMP2:%.*]] = bitcast %struct.zero_bitfield_ok* [[S]] to i8* -// BEWIDTHNUM-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 1 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i8, i8* [[TMP3]], align 1 -// BEWIDTHNUM-NEXT: [[ADD:%.*]] = add i8 [[BF_LOAD1]], [[CONV]] -// BEWIDTHNUM-NEXT: [[BF_LOAD4:%.*]] = load volatile i8, i8* [[TMP3]], align 1 -// BEWIDTHNUM-NEXT: store volatile i8 [[ADD]], i8* [[TMP3]], align 1 -// BEWIDTHNUM-NEXT: ret void -// -void increment_a_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { - s->a1 += s->a; -} - -// LE-LABEL: @increment_b_zero_bitfield_ok( -// LE-NEXT: entry: -// LE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LE-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LE-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LE-NEXT: ret void -// -// BE-LABEL: @increment_b_zero_bitfield_ok( -// BE-NEXT: entry: -// BE-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BE-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BE-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BE-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BE-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BE-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BE-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BE-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BE-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BE-NEXT: ret void -// -// LENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( -// LENUMLOADS-NEXT: entry: -// LENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LENUMLOADS-NEXT: ret void -// -// BENUMLOADS-LABEL: @increment_b_zero_bitfield_ok( -// BENUMLOADS-NEXT: entry: -// BENUMLOADS-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BENUMLOADS-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BENUMLOADS-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BENUMLOADS-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BENUMLOADS-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BENUMLOADS-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BENUMLOADS-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BENUMLOADS-NEXT: ret void -// -// LEWIDTH-LABEL: @increment_b_zero_bitfield_ok( -// LEWIDTH-NEXT: entry: -// LEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTH-NEXT: ret void -// -// BEWIDTH-LABEL: @increment_b_zero_bitfield_ok( -// BEWIDTH-NEXT: entry: -// BEWIDTH-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BEWIDTH-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BEWIDTH-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTH-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BEWIDTH-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BEWIDTH-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTH-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTH-NEXT: ret void -// -// LEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( -// LEWIDTHNUM-NEXT: entry: -// LEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// LEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// LEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[INC:%.*]] = add i32 [[BF_LOAD]], 1 -// LEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: [[BF_VALUE:%.*]] = and i32 [[INC]], 16777215 -// LEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], -16777216 -// LEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_VALUE]] -// LEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// LEWIDTHNUM-NEXT: ret void -// -// BEWIDTHNUM-LABEL: @increment_b_zero_bitfield_ok( -// BEWIDTHNUM-NEXT: entry: -// BEWIDTHNUM-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ZERO_BITFIELD_OK:%.*]], %struct.zero_bitfield_ok* [[S:%.*]], i32 0, i32 1 -// BEWIDTHNUM-NEXT: [[TMP0:%.*]] = bitcast i24* [[B]] to i32* -// BEWIDTHNUM-NEXT: [[BF_LOAD:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[BF_LOAD1:%.*]] = load volatile i32, i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: [[TMP1:%.*]] = add i32 [[BF_LOAD]], 256 -// BEWIDTHNUM-NEXT: [[BF_SHL:%.*]] = and i32 [[TMP1]], -256 -// BEWIDTHNUM-NEXT: [[BF_CLEAR:%.*]] = and i32 [[BF_LOAD1]], 255 -// BEWIDTHNUM-NEXT: [[BF_SET:%.*]] = or i32 [[BF_CLEAR]], [[BF_SHL]] -// BEWIDTHNUM-NEXT: store volatile i32 [[BF_SET]], i32* [[TMP0]], align 4 -// BEWIDTHNUM-NEXT: ret void -// -void increment_b_zero_bitfield_ok(volatile struct zero_bitfield_ok *s) { - s->b++; -} diff --git a/clang/test/CodeGen/bitfield-2.c b/clang/test/CodeGen/bitfield-2.c index 661d42683bc276..9d669575ecd117 100644 --- a/clang/test/CodeGen/bitfield-2.c +++ b/clang/test/CodeGen/bitfield-2.c @@ -14,7 +14,7 @@ // CHECK-RECORD: LLVMType:%struct.s0 = type { [3 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: struct __attribute((packed)) s0 { int f0 : 24; }; @@ -54,8 +54,8 @@ unsigned long long test_0() { // CHECK-RECORD: LLVMType:%struct.s1 = type { [3 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: +// CHECK-RECORD: #pragma pack(push) #pragma pack(1) @@ -102,7 +102,7 @@ unsigned long long test_1() { // CHECK-RECORD: LLVMType:%union.u2 = type { i8 } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: union __attribute__((packed)) u2 { unsigned long long f0 : 3; @@ -274,8 +274,8 @@ _Bool test_6() { // CHECK-RECORD: LLVMType:%struct.s7 = type { i32, i32, i32, i8, i32, [12 x i8] } // CHECK-RECORD: IsZeroInitializable:1 // CHECK-RECORD: BitFields:[ -// CHECK-RECORD: +// CHECK-RECORD: struct __attribute__((aligned(16))) s7 { int a, b, c; From 1f870bd9284ad55dff96ab6f99afd92fd5f294be Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Wed, 2 Sep 2020 11:50:30 -0400 Subject: [PATCH 100/161] Add detailed reference for the SearchableTables backend. --- llvm/docs/TableGen/BackEnds.rst | 381 +++++++++++++++++++++++++++++++- 1 file changed, 377 insertions(+), 4 deletions(-) diff --git a/llvm/docs/TableGen/BackEnds.rst b/llvm/docs/TableGen/BackEnds.rst index 8b313383566894..a93f2ace78808e 100644 --- a/llvm/docs/TableGen/BackEnds.rst +++ b/llvm/docs/TableGen/BackEnds.rst @@ -226,16 +226,14 @@ SearchableTables **Purpose**: Generate custom searchable tables. -**Output**: Enums, global tables and lookup helper functions. +**Output**: Enums, global tables, and lookup helper functions. **Usage**: This backend allows generating free-form, target-specific tables from TableGen records. The ARM and AArch64 targets use this backend to generate tables of system registers; the AMDGPU target uses it to generate meta-data about complex image and memory buffer instructions. -More documentation is available in ``include/llvm/TableGen/SearchableTable.td``, -which also contains the definitions of TableGen classes which must be -instantiated in order to define the enums and tables emitted by this backend. +See `SearchableTables Reference`_ for a detailed description. CTags ----- @@ -438,6 +436,381 @@ used for documenting user-facing attributes. General BackEnds ================ +SearchableTables Reference +-------------------------- + +A TableGen include file, ``SearchableTable.td``, provides classes for +generating C++ searchable tables. These tables are described in the +following sections. To generate the C++ code, run ``llvm-tblgen`` with the +``--gen-searchable-tables`` option, which invokes the backend that generates +the tables from the records you provide. + +Each of the data structures generated for searchable tables is guarded by an +``#ifdef``. This allows you to include the generated ``.inc`` file and select only +certain data structures for inclusion. The examples below show the macro +names used in these guards. + +Generic Enumerated Types +~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``GenericEnum`` class makes it easy to define a C++ enumerated type and +the enumerated *elements* of that type. To define the type, define a record +whose parent class is ``GenericEnum`` and whose name is the desired enum +type. This class provides three fields, which you can set in the record +using the ``let`` statement. + +* ``string FilterClass``. The enum type will have one element for each record + that derives from this class. These records are collected to assemble the + complete set of elements. + +* ``string NameField``. The name of a field *in the collected records* that specifies + the name of the element. If a record has no such field, the record's + name will be used. + +* ``string ValueField``. The name of a field *in the collected records* that + specifies the numerical value of the element. If a record has no such + field, it will be assigned an integer value. Values are assigned in + alphabetical order starting with 0. + +Here is an example where the values of the elements are specified +explicitly, as a template argument to the ``BEntry`` class. The resulting +C++ code is shown. + +.. code-block:: text + + def BValues : GenericEnum { + let FilterClass = "BEntry"; + let NameField = "Name"; + let ValueField = "Encoding"; + } + + class BEntry enc> { + string Name = NAME; + bits<16> Encoding = enc; + } + + def BFoo : BEntry<0xac>; + def BBar : BEntry<0x14>; + def BZoo : BEntry<0x80>; + def BSnork : BEntry<0x4c>; + +.. code-block:: text + + #ifdef GET_BValues_DECL + enum BValues { + BBar = 20, + BFoo = 172, + BSnork = 76, + BZoo = 128, + }; + #endif + +In the following example, the values of the elements are assigned +automatically. Note that values are assigned from 0, in alphabetical order +by element name. + +.. code-block:: text + + def CEnum : GenericEnum { + let FilterClass = "CEnum"; + } + + class CEnum; + + def CFoo : CEnum; + def CBar : CEnum; + def CBaz : CEnum; + +.. code-block:: text + + #ifdef GET_CEnum_DECL + enum CEnum { + CBar = 0, + CBaz = 1, + CFoo = 2, + }; + #endif + + +Generic Tables +~~~~~~~~~~~~~~ + +The ``GenericTable`` class is used to define a searchable generic table. +TableGen produces C++ code to define the table entries and also produces +the declaration and definition of a function to search the table based on a +primary key. To define the table, define a record whose parent class is +``GenericTable`` and whose name is the name of the global table of entries. +This class provides six fields. + +* ``string FilterClass``. The table will have one entry for each record + that derives from this class. + +* ``string CppTypeName``. The name of the C++ struct/class type of the + table that holds the entries. If unspecified, the ``FilterClass`` name is + used. + +* ``list Fields``. A list of the names of the fields in the + collected records that contain the data for the table entries. The order of + this list determines the order of the values in the C++ initializers. See + below for information about the types of these fields. + +* ``list PrimaryKey``. The list of fields that make up the + primary key. + +* ``string PrimaryKeyName``. The name of the generated C++ function + that performs a lookup on the primary key. + +* ``bit PrimaryKeyEarlyOut``. See the third example below. + +TableGen attempts to deduce the type of each of the table fields. It can +deduce ``bit``, ``bits``, ``string``, ``Intrinsic``, and ``Instruction``. +These can be used in the primary key. TableGen also deduces ``code``, but it +cannot be used in the primary key. Any other field types must be specified +explicitly; this is done as shown in the second example below. Such fields +cannot be used in the primary key. + +Here is an example where TableGen can deduce the field types. Note that the +table entry records are anonymous; the names of entry records are +irrelevant. + +.. code-block:: text + + def ATable : GenericTable { + let FilterClass = "AEntry"; + let Fields = ["Str", "Val1", "Val2"]; + let PrimaryKey = ["Val1", "Val2"]; + let PrimaryKeyName = "lookupATableByValues"; + } + + class AEntry { + string Str = str; + bits<8> Val1 = val1; + bits<10> Val2 = val2; + } + + def : AEntry<"Bob", 5, 3>; + def : AEntry<"Carol", 2, 6>; + def : AEntry<"Ted", 4, 4>; + def : AEntry<"Alice", 4, 5>; + def : AEntry<"Costa", 2, 1>; + +Here is the generated C++ code. The declaration of ``lookupATableByValues`` +is guarded by ``GET_ATable_DECL``, while the definitions are guarded by +``GET_ATable_IMPL``. + +.. code-block:: text + + #ifdef GET_ATable_DECL + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2); + #endif + + #ifdef GET_ATable_IMPL + constexpr AEntry ATable[] = { + { "Costa", 0x2, 0x1 }, // 0 + { "Carol", 0x2, 0x6 }, // 1 + { "Ted", 0x4, 0x4 }, // 2 + { "Alice", 0x4, 0x5 }, // 3 + { "Bob", 0x5, 0x3 }, // 4 + }; + + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) { + struct KeyType { + uint8_t Val1; + uint16_t Val2; + }; + KeyType Key = { Val1, Val2 }; + auto Table = makeArrayRef(ATable); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const AEntry &LHS, const KeyType &RHS) { + if (LHS.Val1 < RHS.Val1) + return true; + if (LHS.Val1 > RHS.Val1) + return false; + if (LHS.Val2 < RHS.Val2) + return true; + if (LHS.Val2 > RHS.Val2) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Val1 != Idx->Val1 || + Key.Val2 != Idx->Val2) + return nullptr; + return &*Idx; + } + #endif + +The table entries in ``ATable`` are sorted in order by ``Val1``, and within +each of those values, by ``Val2``. This allows a binary search of the table, +which is performed in the lookup function by ``std::lower_bound``. The +lookup function returns a reference to the found table entry, or the null +pointer if no entry is found. + +This example includes a field whose type TableGen cannot deduce. The ``Kind`` +field uses the enumerated type ``CEnum`` defined above. To inform TableGen +of the type, the class derived from ``GenericTable`` must include a field +named ``TypeOf_``\ *field*, where *field* is the name of the field whose type +is required. + +.. code-block:: text + + def CTable : GenericTable { + let FilterClass = "CEntry"; + let Fields = ["Name", "Kind", "Encoding"]; + GenericEnum TypeOf_Kind = CEnum; + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupCEntryByEncoding"; + } + + class CEntry { + string Name = name; + CEnum Kind = kind; + bits<16> Encoding = enc; + } + + def : CEntry<"Apple", CFoo, 10>; + def : CEntry<"Pear", CBaz, 15>; + def : CEntry<"Apple", CBar, 13>; + +Here is the generated C++ code. + +.. code-block:: text + + #ifdef GET_CTable_DECL + const CEntry *lookupCEntryByEncoding(uint16_t Encoding); + #endif + + #ifdef GET_CTable_IMPL + constexpr CEntry CTable[] = { + { "Apple", CFoo, 0xA }, // 0 + { "Apple", CBar, 0xD }, // 1 + { "Pear", CBaz, 0xF }, // 2 + }; + + const CEntry *lookupCEntryByEncoding(uint16_t Encoding) { + struct KeyType { + uint16_t Encoding; + }; + KeyType Key = { Encoding }; + auto Table = makeArrayRef(CTable); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const CEntry &LHS, const KeyType &RHS) { + if (LHS.Encoding < RHS.Encoding) + return true; + if (LHS.Encoding > RHS.Encoding) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Encoding != Idx->Encoding) + return nullptr; + return &*Idx; + } + +The ``PrimaryKeyEarlyOut`` field, when set to 1, modifies the lookup +function so that it tests the first field of the primary key to determine +whether it is within the range of the collected records' primary keys. If +not, the function returns the null pointer without performing the binary +search. This is useful for tables that provide data for only some of the +elements of a larger enum-based space. The first field of the primary key +must be an integral type; it cannot be a string. + +Adding ``let PrimaryKeyEarlyOut = 1`` to the ``ATable`` above: + +.. code-block:: text + + def ATable : GenericTable { + let FilterClass = "AEntry"; + let Fields = ["Str", "Val1", "Val2"]; + let PrimaryKey = ["Val1", "Val2"]; + let PrimaryKeyName = "lookupATableByValues"; + let PrimaryKeyEarlyOut = 1; + } + +causes the lookup function to change as follows: + +.. code-block:: text + + const AEntry *lookupATableByValues(uint8_t Val1, uint16_t Val2) { + if ((Val1 < 0x2) || + (Val1 > 0x5)) + return nullptr; + + struct KeyType { + ... + +Search Indexes +~~~~~~~~~~~~~~ + +The ``SearchIndex`` class is used to define additional lookup functions for +generic tables. To define an additional function, define a record whose parent +class is ``SearchIndex`` and whose name is the name of the desired lookup +function. This class provides three fields. + +* ``GenericTable Table``. The name of the table that is to receive another + lookup function. + +* ``list Key``. The list of fields that make up the secondary key. + +* ``bit EarlyOut``. See the third example in `Generic Tables`_. + +Here is an example of a secondary key added to the ``CTable`` above. The +generated function looks up entries based on the ``Name`` and ``Kind`` fields. + +.. code-block:: text + + def lookupCEntry : SearchIndex { + let Table = CTable; + let Key = ["Name", "Kind"]; + } + +This use of ``SearchIndex`` generates the following additional C++ code. + +.. code-block:: text + + const CEntry *lookupCEntry(StringRef Name, unsigned Kind); + + ... + + const CEntry *lookupCEntryByName(StringRef Name, unsigned Kind) { + struct IndexType { + const char * Name; + unsigned Kind; + unsigned _index; + }; + static const struct IndexType Index[] = { + { "APPLE", CBar, 1 }, + { "APPLE", CFoo, 0 }, + { "PEAR", CBaz, 2 }, + }; + + struct KeyType { + std::string Name; + unsigned Kind; + }; + KeyType Key = { Name.upper(), Kind }; + auto Table = makeArrayRef(Index); + auto Idx = std::lower_bound(Table.begin(), Table.end(), Key, + [](const IndexType &LHS, const KeyType &RHS) { + int CmpName = StringRef(LHS.Name).compare(RHS.Name); + if (CmpName < 0) return true; + if (CmpName > 0) return false; + if ((unsigned)LHS.Kind < (unsigned)RHS.Kind) + return true; + if ((unsigned)LHS.Kind > (unsigned)RHS.Kind) + return false; + return false; + }); + + if (Idx == Table.end() || + Key.Name != Idx->Name || + Key.Kind != Idx->Kind) + return nullptr; + return &CTable[Idx->_index]; + } + JSON ---- From e6bb4c8e7b3e27f214c9665763a2dd09aa96a5ac Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 8 Sep 2020 10:49:32 -0700 Subject: [PATCH 101/161] [X86] SSE4_A should only imply SSE3 not SSSE3 in the frontend. SSE4_1 and SSE4_2 due imply SSSE3. So I guess I got confused when switching the code to being table based in D83273. Fixes PR47464 --- clang/test/Preprocessor/predefined-arch-macros.c | 2 ++ llvm/lib/Support/X86TargetParser.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index 5326596fee93c5..3c369ace32d51f 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -2525,6 +2525,7 @@ // CHECK_AMDFAM10_M32: #define __SSE4A__ 1 // CHECK_AMDFAM10_M32: #define __SSE_MATH__ 1 // CHECK_AMDFAM10_M32: #define __SSE__ 1 +// CHECK_AMDFAM10_M32-NOT: #define __SSSE3__ 1 // CHECK_AMDFAM10_M32: #define __amdfam10 1 // CHECK_AMDFAM10_M32: #define __amdfam10__ 1 // CHECK_AMDFAM10_M32: #define __i386 1 @@ -2547,6 +2548,7 @@ // CHECK_AMDFAM10_M64: #define __SSE4A__ 1 // CHECK_AMDFAM10_M64: #define __SSE_MATH__ 1 // CHECK_AMDFAM10_M64: #define __SSE__ 1 +// CHECK_AMDFAM10_M64-NOT: #define __SSSE3__ 1 // CHECK_AMDFAM10_M64: #define __amd64 1 // CHECK_AMDFAM10_M64: #define __amd64__ 1 // CHECK_AMDFAM10_M64: #define __amdfam10 1 diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index a5af98582452b3..b7d9bd4f865c90 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -529,7 +529,7 @@ static constexpr FeatureBitset ImpliedFeaturesAVX5124FMAPS = {}; static constexpr FeatureBitset ImpliedFeaturesAVX5124VNNIW = {}; // SSE4_A->FMA4->XOP chain. -static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSSE3; +static constexpr FeatureBitset ImpliedFeaturesSSE4_A = FeatureSSE3; static constexpr FeatureBitset ImpliedFeaturesFMA4 = FeatureAVX | FeatureSSE4_A; static constexpr FeatureBitset ImpliedFeaturesXOP = FeatureFMA4; From 59a467ee4faeee5b569960e53a76a0311d050d18 Mon Sep 17 00:00:00 2001 From: Xun Li Date: Tue, 8 Sep 2020 10:58:35 -0700 Subject: [PATCH 102/161] [Coroutine] Make dealing with alloca spills more robust D66230 attempted to fix a problem where when there are allocas used before CoroBegin. It keeps allocas and their uses stay in put if there are no escapse/changes to the data before CoroBegin. Unfortunately that's incorrect. Consider this code: %var = alloca i32 %1 = getelementptr .. %var; stays put %f = call i8* @llvm.coro.begin store ... %1 After this fix, %1 will now stay put, however if a store happens after coro.begin and hence modifies the content, this change will not be reflected in the coroutine frame (and will eventually be DCEed). To generalize the problem, if any alias ptr is created before coro.begin for an Alloca and that alias ptr is latter written into after coro.begin, it will lead to incorrect behavior. There are also a few other minor issues, such as incorrect dominate condition check in the ptr visitor, unhandled memory intrinsics and etc. Ths patch attempts to fix some of these issue, and make it more robust to deal with aliases. While visiting through the alloca pointer, we also keep track of all aliases created that will be used after CoroBegin. We track the offset of each alias, and then reacreate these aliases after CoroBegin using these offset. It's worth noting that this is not perfect and there will still be cases we cannot handle. I think it's impractical to handle all cases given the current design. This patch makes it more robust and should be a pure win. In the meantime, we need to think about what how to completely elimiante these issues, likely through the route as @rjmccall mentioned in D66230. Differential Revision: https://reviews.llvm.org/D86859 --- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 121 ++++++++++++++---- .../Transforms/Coroutines/coro-param-copy.ll | 57 ++++++--- 2 files changed, 136 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index b2677b4572e47c..acb14b11aba9ef 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -625,7 +625,22 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, // We use a pointer use visitor to discover if there are any writes into an // alloca that dominates CoroBegin. If that is the case, insertSpills will copy // the value from the alloca into the coroutine frame spill slot corresponding -// to that alloca. +// to that alloca. We also collect any alias pointing to the alloca created +// before CoroBegin but used after CoroBegin. These alias will be recreated +// after CoroBegin from the frame address so that latter references are +// pointing to the frame instead of the stack. +// Note: We are repurposing PtrUseVisitor's isEscaped() to mean whether the +// pointer is potentially written into. +// TODO: If the pointer is really escaped, we are in big trouble because we +// will be escaping a pointer to a stack address that would no longer exist +// soon. However most escape analysis isn't good enough to precisely tell, +// so we are assuming that if a pointer is escaped that it's written into. +// TODO: Another potential issue is if we are creating an alias through +// a function call, e.g: +// %a = AllocaInst ... +// %b = call @computeAddress(... %a) +// If %b is an alias of %a and will be used after CoroBegin, this will be broken +// and there is nothing we can do about it. namespace { struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; @@ -633,49 +648,83 @@ struct AllocaUseVisitor : PtrUseVisitor { const CoroBeginInst &CB) : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {} - // We are only interested in uses that dominate coro.begin. + // We are only interested in uses that's not dominated by coro.begin. void visit(Instruction &I) { - if (DT.dominates(&I, &CoroBegin)) + if (!DT.dominates(&CoroBegin, &I)) Base::visit(I); } // We need to provide this overload as PtrUseVisitor uses a pointer based // visiting function. void visit(Instruction *I) { return visit(*I); } - void visitLoadInst(LoadInst &) {} // Good. Nothing to do. + // We cannot handle PHI node and SelectInst because they could be selecting + // between two addresses that point to different Allocas. + void visitPHINode(PHINode &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle PHI node of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitSelectInst(SelectInst &I) { + assert(!usedAfterCoroBegin(I) && + "Unable to handle Select of aliases created before CoroBegin but " + "used after CoroBegin"); + } + + void visitLoadInst(LoadInst &) {} // If the use is an operand, the pointer escaped and anything can write into // that memory. If the use is the pointer, we are definitely writing into the // alloca and therefore we need to copy. - void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); } + void visitStoreInst(StoreInst &SI) { PI.setEscaped(&SI); } - // Any other instruction that is not filtered out by PtrUseVisitor, will - // result in the copy. - void visitInstruction(Instruction &I) { PI.setAborted(&I); } + // All mem intrinsics modify the data. + void visitMemIntrinsic(MemIntrinsic &MI) { PI.setEscaped(&MI); } + + void visitBitCastInst(BitCastInst &BC) { + Base::visitBitCastInst(BC); + handleAlias(BC); + } + + void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { + Base::visitAddrSpaceCastInst(ASC); + handleAlias(ASC); + } + + void visitGetElementPtrInst(GetElementPtrInst &GEPI) { + // The base visitor will adjust Offset accordingly. + Base::visitGetElementPtrInst(GEPI); + handleAlias(GEPI); + } + + const SmallVector, 1> &getAliases() const { + return Aliases; + } private: const DominatorTree &DT; const CoroBeginInst &CoroBegin; + // All alias to the original AllocaInst, and are used after CoroBegin. + // Each entry contains the instruction and the offset in the original Alloca. + SmallVector, 1> Aliases{}; + + bool usedAfterCoroBegin(Instruction &I) { + for (auto &U : I.uses()) + if (DT.dominates(&CoroBegin, U)) + return true; + return false; + } + + void handleAlias(Instruction &I) { + if (!usedAfterCoroBegin(I)) + return; + + assert(IsOffsetKnown && "Can only handle alias with known offset created " + "before CoroBegin and used after"); + Aliases.emplace_back(&I, Offset); + } }; } // namespace -static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT, - const CoroBeginInst &CB) { - const DataLayout &DL = A.getModule()->getDataLayout(); - AllocaUseVisitor Visitor(DL, DT, CB); - auto PtrI = Visitor.visitPtr(A); - if (PtrI.isEscaped() || PtrI.isAborted()) { - auto *PointerEscapingInstr = PtrI.getEscapingInst() - ? PtrI.getEscapingInst() - : PtrI.getAbortingInst(); - if (PointerEscapingInstr) { - LLVM_DEBUG( - dbgs() << "AllocaInst copy was triggered by instruction: " - << *PointerEscapingInstr << "\n"); - } - return true; - } - return false; -} // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that @@ -955,7 +1004,11 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { for (auto &P : Allocas) { AllocaInst *const A = P.first; - if (mightWriteIntoAllocaPtr(*A, DT, *CB)) { + AllocaUseVisitor Visitor(A->getModule()->getDataLayout(), DT, *CB); + auto PtrI = Visitor.visitPtr(*A); + assert(!PtrI.isAborted()); + if (PtrI.isEscaped()) { + // isEscaped really means potentially modified before CoroBegin. if (A->isArrayAllocation()) report_fatal_error( "Coroutines cannot handle copying of array allocas yet"); @@ -964,6 +1017,20 @@ static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) { auto *Value = Builder.CreateLoad(A->getAllocatedType(), A); Builder.CreateStore(Value, G); } + // For each alias to Alloca created before CoroBegin but used after + // CoroBegin, we recreate them after CoroBegin by appplying the offset + // to the pointer in the frame. + for (const auto &Alias : Visitor.getAliases()) { + auto *FramePtr = GetFramePointer(P.second, A); + auto *FramePtrRaw = + Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); + auto *AliasPtr = Builder.CreateGEP( + FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second)); + auto *AliasPtrTyped = + Builder.CreateBitCast(AliasPtr, Alias.first->getType()); + Alias.first->replaceUsesWithIf( + AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); + } } } return FramePtr; diff --git a/llvm/test/Transforms/Coroutines/coro-param-copy.ll b/llvm/test/Transforms/Coroutines/coro-param-copy.ll index 5967a05226fdb2..da08c4f15e156f 100644 --- a/llvm/test/Transforms/Coroutines/coro-param-copy.ll +++ b/llvm/test/Transforms/Coroutines/coro-param-copy.ll @@ -5,22 +5,37 @@ define i8* @f() "coroutine.presplit"="1" { entry: + %a.addr = alloca i64 ; read-only before coro.begin + %a = load i64, i64* %a.addr ; cannot modify the value, don't need to copy + %x.addr = alloca i64 - call void @use(i64* %x.addr) ; might write to %x + call void @use(i64* %x.addr) ; uses %x.addr before coro.begin + %y.addr = alloca i64 - %y = load i64, i64* %y.addr ; cannot modify the value, don't need to copy - call void @print(i64 %y) + %y.cast = bitcast i64* %y.addr to i8* ; alias created and used after coro.begin + + %z.addr = alloca i64 + %flag = call i1 @check() + br i1 %flag, label %flag_true, label %flag_merge + +flag_true: + call void @use(i64* %z.addr) ; conditionally used %z.addr + br label %flag_merge +flag_merge: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %size = call i32 @llvm.coro.size.i32() - %alloc = call i8* @myAlloc(i64 %y, i32 %size) + %alloc = call i8* @myAlloc(i32 %size) %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + call void @llvm.memset.p0i8.i32(i8* %y.cast, i8 1, i32 4, i1 false) %0 = call i8 @llvm.coro.suspend(token none, i1 false) switch i8 %0, label %suspend [i8 0, label %resume i8 1, label %cleanup] resume: + call void @use(i64* %a.addr) call void @use(i64* %x.addr) call void @use(i64* %y.addr) + call void @use(i64* %z.addr) br label %cleanup cleanup: @@ -33,26 +48,36 @@ suspend: } ; See that we added both x and y to the frame. -; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i1 } +; CHECK: %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i64, i64, i64, i1 } ; See that all of the uses prior to coro-begin stays put. ; CHECK-LABEL: define i8* @f() { ; CHECK-NEXT: entry: +; CHECK-NEXT: %a.addr = alloca i64 ; CHECK-NEXT: %x.addr = alloca i64 ; CHECK-NEXT: call void @use(i64* %x.addr) ; CHECK-NEXT: %y.addr = alloca i64 -; CHECK-NEXT: %y = load i64, i64* %y.addr -; CHECK-NEXT: call void @print(i64 %y) +; CHECK-NEXT: %z.addr = alloca i64 ; See that we only copy the x as y was not modified prior to coro.begin. -; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr -; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 2 -; CHECK-NEXT: %1 = load i64, i64* %x.addr -; CHECK-NEXT: store i64 %1, i64* %0 -; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 -; CHECK-NEXT: store i1 false, i1* %index.addr1 +; CHECK: store void (%f.Frame*)* @f.destroy, void (%f.Frame*)** %destroy.addr +; The next 3 instructions are to copy data in %x.addr from stack to frame. +; CHECK-NEXT: %0 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 3 +; CHECK-NEXT: %1 = load i64, i64* %x.addr, align 4 +; CHECK-NEXT: store i64 %1, i64* %0, align 4 +; The next 2 instructions are to recreate %y.cast in the original IR. +; CHECK-NEXT: %2 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 4 +; CHECK-NEXT: %3 = bitcast i64* %2 to i8* +; The next 3 instructions are to copy data in %z.addr from stack to frame. +; CHECK-NEXT: %4 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 5 +; CHECK-NEXT: %5 = load i64, i64* %z.addr, align 4 +; CHECK-NEXT: store i64 %5, i64* %4, align 4 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %3, i8 1, i32 4, i1 false) +; CHECK-NEXT: %index.addr1 = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0, i32 6 +; CHECK-NEXT: store i1 false, i1* %index.addr1, align 1 ; CHECK-NEXT: ret i8* %hdl + declare i8* @llvm.coro.free(token, i8*) declare i32 @llvm.coro.size.i32() declare i8 @llvm.coro.suspend(token, i1) @@ -64,7 +89,9 @@ declare i1 @llvm.coro.alloc(token) declare i8* @llvm.coro.begin(token, i8*) declare i1 @llvm.coro.end(i8*, i1) -declare noalias i8* @myAlloc(i64, i32) -declare void @print(i64) +declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1) + +declare noalias i8* @myAlloc(i32) declare void @use(i64*) declare void @free(i8*) +declare i1 @check() From e97f3b1b4327f9db0ca12cdd7157c304ad206802 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 5 Sep 2020 17:23:48 +0200 Subject: [PATCH 103/161] [InstCombine] Fold abs of known negative operand If we know that the abs operand is known negative, we can replace it with a neg. To avoid computing known bits twice, I've removed the fold for the non-negative case from InstSimplify. Both the non-negative and the negative case are handled by InstCombine now, with one known bits call. Differential Revision: https://reviews.llvm.org/D87196 --- llvm/lib/Analysis/InstructionSimplify.cpp | 3 --- .../InstCombine/InstCombineCalls.cpp | 19 +++++++++++++++---- .../Transforms/InstCombine/abs-intrinsic.ll | 7 +++---- .../Transforms/InstSimplify/abs_intrinsic.ll | 17 ++++++++++++----- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 7c13b41bc7e648..e59c0a84044aae 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5274,9 +5274,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, // on the outer abs. if (match(Op0, m_Intrinsic(m_Value(), m_Value()))) return Op0; - // If the sign bit is clear already, then abs does not do anything. - if (isKnownNonNegative(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return Op0; break; case Intrinsic::smax: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 40f6e9e147d768..11c2367d1608e9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -657,6 +657,19 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) { return nullptr; } +static Optional getKnownSign(Value *Op, Instruction *CxtI, + const DataLayout &DL, AssumptionCache *AC, + DominatorTree *DT) { + KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT); + if (Known.isNonNegative()) + return false; + if (Known.isNegative()) + return true; + + return isImpliedByDomCondition( + ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL); +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallBase to do the heavy /// lifting. @@ -791,11 +804,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X)))) return replaceOperand(*II, 0, X); - if (Optional Imp = isImpliedByDomCondition( - ICmpInst::ICMP_SGE, IIOperand, - Constant::getNullValue(IIOperand->getType()), II, DL)) { + if (Optional Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) { // abs(x) -> x if x >= 0 - if (*Imp) + if (!*Sign) return replaceInstUsesWith(*II, IIOperand); // abs(x) -> -x if x < 0 diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index b00681d44d26c9..b5a74f728ac396 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -233,7 +233,7 @@ define i32 @abs_assume_neg(i32 %x) { ; CHECK-LABEL: @abs_assume_neg( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NEXT: [[ABS:%.*]] = sub i32 0, [[X]] ; CHECK-NEXT: ret i32 [[ABS]] ; %cmp = icmp slt i32 %x, 0 @@ -245,9 +245,8 @@ define i32 @abs_assume_neg(i32 %x) { define i32 @abs_known_neg(i16 %x) { ; CHECK-LABEL: @abs_known_neg( ; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[NEG:%.*]] = xor i32 [[EXT]], -1 -; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[NEG]], i1 false) -; CHECK-NEXT: ret i32 [[ABS]] +; CHECK-NEXT: [[NEG_NEG:%.*]] = add nuw nsw i32 [[EXT]], 1 +; CHECK-NEXT: ret i32 [[NEG_NEG]] ; %ext = zext i16 %x to i32 %neg = sub nsw i32 -1, %ext diff --git a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll index 70b50da9f0415a..4598c5732e1213 100644 --- a/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll +++ b/llvm/test/Transforms/InstSimplify/abs_intrinsic.ll @@ -47,11 +47,14 @@ define i32 @test_abs_abs_3(i32 %x) { } ; If the sign bit is known zero, the abs is not needed. +; These cases are only folded by InstCombine, to avoid computing known bits +; twice, for the non-negative and the negative case. define i32 @zext_abs(i31 %x) { ; CHECK-LABEL: @zext_abs( ; CHECK-NEXT: [[ZEXT:%.*]] = zext i31 [[X:%.*]] to i32 -; CHECK-NEXT: ret i32 [[ZEXT]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[ZEXT]], i1 false) +; CHECK-NEXT: ret i32 [[ABS]] ; %zext = zext i31 %x to i32 %abs = call i32 @llvm.abs.i32(i32 %zext, i1 false) @@ -61,7 +64,8 @@ define i32 @zext_abs(i31 %x) { define <3 x i82> @lshr_abs(<3 x i82> %x) { ; CHECK-LABEL: @lshr_abs( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i82> [[X:%.*]], -; CHECK-NEXT: ret <3 x i82> [[LSHR]] +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[LSHR]], i1 true) +; CHECK-NEXT: ret <3 x i82> [[ABS]] ; %lshr = lshr <3 x i82> %x, %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %lshr, i1 true) @@ -71,7 +75,8 @@ define <3 x i82> @lshr_abs(<3 x i82> %x) { define i32 @and_abs(i32 %x) { ; CHECK-LABEL: @and_abs( ; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 2147483644 -; CHECK-NEXT: ret i32 [[AND]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[AND]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] ; %and = and i32 %x, 2147483644 %abs = call i32 @llvm.abs.i32(i32 %and, i1 true) @@ -81,7 +86,8 @@ define i32 @and_abs(i32 %x) { define <3 x i82> @select_abs(<3 x i1> %cond) { ; CHECK-LABEL: @select_abs( ; CHECK-NEXT: [[SEL:%.*]] = select <3 x i1> [[COND:%.*]], <3 x i82> zeroinitializer, <3 x i82> -; CHECK-NEXT: ret <3 x i82> [[SEL]] +; CHECK-NEXT: [[ABS:%.*]] = call <3 x i82> @llvm.abs.v3i82(<3 x i82> [[SEL]], i1 false) +; CHECK-NEXT: ret <3 x i82> [[ABS]] ; %sel = select <3 x i1> %cond, <3 x i82> zeroinitializer, <3 x i82> %abs = call <3 x i82> @llvm.abs.v3i82(<3 x i82> %sel, i1 false) @@ -94,7 +100,8 @@ define i32 @assume_abs(i32 %x) { ; CHECK-LABEL: @assume_abs( ; CHECK-NEXT: [[ASSUME:%.*]] = icmp sge i32 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[ASSUME]]) -; CHECK-NEXT: ret i32 [[X]] +; CHECK-NEXT: [[ABS:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 true) +; CHECK-NEXT: ret i32 [[ABS]] ; %assume = icmp sge i32 %x, 0 call void @llvm.assume(i1 %assume) From 6eef387ddd863db1afe044e208bbff4366d5dac2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 20:20:32 +0200 Subject: [PATCH 104/161] [InstCombine] Test comparison of abs with int min (NFC) --- .../Transforms/InstCombine/abs-intrinsic.ll | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index b5a74f728ac396..d63b0a21f217fd 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +declare i8 @llvm.abs.i8(i8, i1) declare i32 @llvm.abs.i32(i32, i1) declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) declare <3 x i82> @llvm.abs.v3i82(<3 x i82>, i1) @@ -253,3 +254,43 @@ define i32 @abs_known_neg(i16 %x) { %abs = call i32 @llvm.abs.i32(i32 %neg, i1 false) ret i32 %abs } + +define i1 @abs_eq_int_min_poison(i8 %x) { +; CHECK-LABEL: @abs_eq_int_min_poison( +; CHECK-NEXT: ret i1 false +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp eq i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_ne_int_min_poison(i8 %x) { +; CHECK-LABEL: @abs_ne_int_min_poison( +; CHECK-NEXT: ret i1 true +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 true) + %cmp = icmp ne i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_eq_int_min_nopoison(i8 %x) { +; CHECK-LABEL: @abs_eq_int_min_nopoison( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ABS]], -128 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp eq i8 %abs, -128 + ret i1 %cmp +} + +define i1 @abs_ne_int_min_nopoison(i8 %x) { +; CHECK-LABEL: @abs_ne_int_min_nopoison( +; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[ABS]], -128 +; CHECK-NEXT: ret i1 [[CMP]] +; + %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) + %cmp = icmp ne i8 %abs, -128 + ret i1 %cmp +} From f6b87da0c73fcf7f8f051151ce62d2e07a466a8e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 20:23:03 +0200 Subject: [PATCH 105/161] [InstCombine] Fold comparison of abs with int min If the abs is poisoning, this is already folded to true/false. For non-poisoning abs, we can convert this to a comparison with the operand. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 5 +++-- llvm/test/Transforms/InstCombine/abs-intrinsic.ll | 6 ++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 608017b6dca251..74e9525e8ed46e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3090,9 +3090,10 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( switch (II->getIntrinsicID()) { case Intrinsic::abs: // abs(A) == 0 -> A == 0 - if (C.isNullValue()) + // abs(A) == INT_MIN -> A == INT_MIN + if (C.isNullValue() || C.isMinSignedValue()) return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0), - Constant::getNullValue(Ty)); + ConstantInt::get(Ty, C)); break; case Intrinsic::bswap: diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll index d63b0a21f217fd..30e5a9ddab3c61 100644 --- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll @@ -275,8 +275,7 @@ define i1 @abs_ne_int_min_poison(i8 %x) { define i1 @abs_eq_int_min_nopoison(i8 %x) { ; CHECK-LABEL: @abs_eq_int_min_nopoison( -; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[ABS]], -128 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[X:%.*]], -128 ; CHECK-NEXT: ret i1 [[CMP]] ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) @@ -286,8 +285,7 @@ define i1 @abs_eq_int_min_nopoison(i8 %x) { define i1 @abs_ne_int_min_nopoison(i8 %x) { ; CHECK-LABEL: @abs_ne_int_min_nopoison( -; CHECK-NEXT: [[ABS:%.*]] = call i8 @llvm.abs.i8(i8 [[X:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[ABS]], -128 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], -128 ; CHECK-NEXT: ret i1 [[CMP]] ; %abs = call i8 @llvm.abs.i8(i8 %x, i1 false) From d95ef009bd502a1c2c82952d4fa6fd1db836cef9 Mon Sep 17 00:00:00 2001 From: Azharuddin Mohammed Date: Tue, 8 Sep 2020 10:57:06 -0700 Subject: [PATCH 106/161] Update clang/test/Driver/darwin-infer-simulator-sdkroot.c - Fix it to work on Apple Silicon - Add testcases for simulators running on Apple Silicon --- .../Driver/darwin-infer-simulator-sdkroot.c | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/darwin-infer-simulator-sdkroot.c b/clang/test/Driver/darwin-infer-simulator-sdkroot.c index a084bf6346b621..7d4d4070b81a11 100644 --- a/clang/test/Driver/darwin-infer-simulator-sdkroot.c +++ b/clang/test/Driver/darwin-infer-simulator-sdkroot.c @@ -17,7 +17,7 @@ // // RUN: rm -rf %t/SDKs/iPhoneSimulator8.0.sdk // RUN: mkdir -p %t/SDKs/iPhoneSimulator8.0.sdk -// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator8.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator8.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-SIMULATOR %s // // CHECK-SIMULATOR: clang @@ -27,6 +27,18 @@ // CHECK-SIMULATOR: "-ios_simulator_version_min" "8.0.0" // // +// RUN: rm -rf %t/SDKs/iPhoneSimulator14.0.sdk +// RUN: mkdir -p %t/SDKs/iPhoneSimulator14.0.sdk +// RUN: env SDKROOT=%t/SDKs/iPhoneSimulator14.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-SIMULATOR-ARM64 %s +// +// CHECK-SIMULATOR-ARM64: clang +// CHECK-SIMULATOR-ARM64: "-cc1" +// CHECK-SIMULATOR-ARM64: -apple-ios14.0.0-simulator" +// CHECK-SIMULATOR-ARM64: ld +// CHECK-SIMULATOR-ARM64: "-ios_simulator_version_min" "14.0.0" +// +// // RUN: rm -rf %t/SDKs/WatchOS3.0.sdk // RUN: mkdir -p %t/SDKs/WatchOS3.0.sdk // RUN: env SDKROOT=%t/SDKs/WatchOS3.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ @@ -43,7 +55,7 @@ // // RUN: rm -rf %t/SDKs/WatchSimulator3.0.sdk // RUN: mkdir -p %t/SDKs/WatchSimulator3.0.sdk -// RUN: env SDKROOT=%t/SDKs/WatchSimulator3.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/WatchSimulator3.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-WATCH-SIMULATOR %s // // CHECK-WATCH-SIMULATOR: clang @@ -53,6 +65,18 @@ // CHECK-WATCH-SIMULATOR: "-watchos_simulator_version_min" "3.0.0" // // +// RUN: rm -rf %t/SDKs/WatchSimulator7.0.sdk +// RUN: mkdir -p %t/SDKs/WatchSimulator7.0.sdk +// RUN: env SDKROOT=%t/SDKs/WatchSimulator7.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-WATCH-SIMULATOR-ARM64 %s +// +// CHECK-WATCH-SIMULATOR-ARM64: clang +// CHECK-WATCH-SIMULATOR-ARM64: "-cc1" +// CHECK-WATCH-SIMULATOR-ARM64: -apple-watchos7.0.0-simulator" +// CHECK-WATCH-SIMULATOR-ARM64: ld +// CHECK-WATCH-SIMULATOR-ARM64: "-watchos_simulator_version_min" "7.0.0" +// +// // RUN: rm -rf %t/SDKs/AppleTVOS10.0.sdk // RUN: mkdir -p %t/SDKs/AppleTVOS10.0.sdk // RUN: env SDKROOT=%t/SDKs/AppleTVOS10.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ @@ -67,7 +91,7 @@ // // RUN: rm -rf %t/SDKs/AppleTVSimulator10.0.sdk // RUN: mkdir -p %t/SDKs/AppleTVSimulator10.0.sdk -// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator10.0.sdk %clang %s -mlinker-version=400 -### 2>&1 \ +// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator10.0.sdk %clang -arch x86_64 %s -mlinker-version=400 -### 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-TV-SIMULATOR %s // // CHECK-TV-SIMULATOR: clang @@ -75,3 +99,16 @@ // CHECK-TV-SIMULATOR: -apple-tvos10.0.0-simulator" // CHECK-TV-SIMULATOR: ld // CHECK-TV-SIMULATOR: "-tvos_simulator_version_min" "10.0.0" +// +// +// RUN: rm -rf %t/SDKs/AppleTVSimulator14.0.sdk +// RUN: mkdir -p %t/SDKs/AppleTVSimulator14.0.sdk +// RUN: env SDKROOT=%t/SDKs/AppleTVSimulator14.0.sdk %clang -arch arm64 %s -mlinker-version=400 -### 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-TV-SIMULATOR-ARM64 %s +// +// CHECK-TV-SIMULATOR-ARM64: clang +// CHECK-TV-SIMULATOR-ARM64: "-cc1" +// CHECK-TV-SIMULATOR-ARM64: -apple-tvos14.0.0-simulator" +// CHECK-TV-SIMULATOR-ARM64: ld +// CHECK-TV-SIMULATOR-ARM64: "-tvos_simulator_version_min" "14.0.0" + From ce49b7d9ca01f4abbba1e5a00339d539b0ea563e Mon Sep 17 00:00:00 2001 From: Alexander Shaposhnikov Date: Tue, 8 Sep 2020 10:24:58 -0700 Subject: [PATCH 107/161] [llvm-install-name-tool] Add a test with multiple input files This diff adds a test which checks the error-message when multiple input files are passed to llvm-install-name-tool. Test plan: make check-all Differential revision: https://reviews.llvm.org/D87268 --- llvm/test/tools/llvm-objcopy/tool-help-message.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/test/tools/llvm-objcopy/tool-help-message.test b/llvm/test/tools/llvm-objcopy/tool-help-message.test index 1a0712b7a7ce5a..3f99d910ee97e6 100644 --- a/llvm/test/tools/llvm-objcopy/tool-help-message.test +++ b/llvm/test/tools/llvm-objcopy/tool-help-message.test @@ -18,6 +18,7 @@ # RUN: not llvm-install-name-tool -abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s # RUN: not llvm-install-name-tool --abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s # RUN: not llvm-install-name-tool -add_rpath @executable 2>&1 | FileCheck %s --check-prefix=NO-INPUT-FILES +# RUN: not llvm-install-name-tool -add_rpath @executable f1 f2 2>&1 | FileCheck %s --check-prefix=MULTIPLE-INPUT-FILES # OBJCOPY-USAGE: USAGE: llvm-objcopy [options] input [output] # OBJCOPY-USAGE: Pass @FILE as argument to read options from FILE. @@ -30,3 +31,4 @@ # UNKNOWN-ARG: unknown argument '{{-+}}abcabc' # NO-INPUT-FILES: no input file specified +# MULTIPLE-INPUT-FILES: expects a single input file From 863aa0a37bd1a57b0720eda6d646f9abd51bf6c2 Mon Sep 17 00:00:00 2001 From: Andrew Ng Date: Mon, 7 Sep 2020 17:36:14 +0100 Subject: [PATCH 108/161] [LLD][ELF] Fix performance of MarkLive::scanEhFrameSection MarkLive::scanEhFrameSection is used to retain personality/LSDA functions when --gc-sections is enabled. Improve its performance by only iterating over the .eh_frame relocations that need to be resolved for an EhSectionPiece. This optimization makes the same assumption as elsewhere in LLD that the .eh_frame relocations are sorted by r_offset. This appears to be a performance regression introduced in commit e6c24299d237 (https://reviews.llvm.org/D59800). This change has been seen to reduce link time by up to ~50%. Differential Revision: https://reviews.llvm.org/D87245 --- lld/ELF/MarkLive.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 28e13e8c1234bf..af6c08c2158165 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -152,9 +152,9 @@ void MarkLive::scanEhFrameSection(EhInputSection &eh, // a LSDA. We only need to keep the LSDA alive, so ignore anything that // points to executable sections. uint64_t pieceEnd = piece.inputOff + piece.size; - for (size_t j = firstRelI, end2 = rels.size(); j < end2; ++j) - if (rels[j].r_offset < pieceEnd) - resolveReloc(eh, rels[j], true); + for (size_t j = firstRelI, end2 = rels.size(); + j < end2 && rels[j].r_offset < pieceEnd; ++j) + resolveReloc(eh, rels[j], true); } } From 17dce2fe43c9d3335d64936ece576b0e36d8fe31 Mon Sep 17 00:00:00 2001 From: David Stenberg Date: Tue, 8 Sep 2020 18:54:30 +0200 Subject: [PATCH 109/161] [UnifyFunctionExitNodes] Remove unused getters, NFC The get{Return,Unwind,Unreachable}Block functions in UnifyFunctionExitNodes have not been used for many years, so just remove them. Reviewed By: bjope Differential Revision: https://reviews.llvm.org/D87078 --- .../Transforms/Utils/UnifyFunctionExitNodes.h | 16 +------------ .../Utils/UnifyFunctionExitNodes.cpp | 24 +++++-------------- 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h index ff70446e163d48..ce7cb16b3886d2 100644 --- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h +++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h @@ -7,10 +7,7 @@ //===----------------------------------------------------------------------===// // // This pass is used to ensure that functions have at most one return and one -// unwind instruction in them. Additionally, it keeps track of which node is -// the new exit node of the CFG. If there are no return or unwind instructions -// in the function, the getReturnBlock/getUnwindBlock methods will return a null -// pointer. +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -24,10 +21,6 @@ namespace llvm { class BasicBlock; struct UnifyFunctionExitNodes : public FunctionPass { - BasicBlock *ReturnBlock = nullptr; - BasicBlock *UnwindBlock = nullptr; - BasicBlock *UnreachableBlock; - public: static char ID; // Pass identification, replacement for typeid UnifyFunctionExitNodes(); @@ -35,13 +28,6 @@ struct UnifyFunctionExitNodes : public FunctionPass { // We can preserve non-critical-edgeness when we unify function exit nodes void getAnalysisUsage(AnalysisUsage &AU) const override; - // getReturn|Unwind|UnreachableBlock - Return the new single (or nonexistent) - // return, unwind, or unreachable basic blocks in the CFG. - // - BasicBlock *getReturnBlock() const { return ReturnBlock; } - BasicBlock *getUnwindBlock() const { return UnwindBlock; } - BasicBlock *getUnreachableBlock() const { return UnreachableBlock; } - bool runOnFunction(Function &F) override; }; diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 9af39d9a0dd1c8..b124d0536254be 100644 --- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -6,10 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This pass is used to ensure that functions have at most one return -// instruction in them. Additionally, it keeps track of which node is the new -// exit node of the CFG. If there are no exit nodes in the CFG, the getExitNode -// method will return a null pointer. +// This pass is used to ensure that functions have at most one return and one +// unreachable instruction in them. // //===----------------------------------------------------------------------===// @@ -61,12 +59,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { UnreachableBlocks.push_back(&I); // Then unreachable blocks. - if (UnreachableBlocks.empty()) { - UnreachableBlock = nullptr; - } else if (UnreachableBlocks.size() == 1) { - UnreachableBlock = UnreachableBlocks.front(); - } else { - UnreachableBlock = BasicBlock::Create(F.getContext(), + if (UnreachableBlocks.size() > 1) { + BasicBlock *UnreachableBlock = BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); new UnreachableInst(F.getContext(), UnreachableBlock); @@ -76,14 +70,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { } } - // Now handle return blocks. - if (ReturningBlocks.empty()) { - ReturnBlock = nullptr; - return false; // No blocks return - } else if (ReturningBlocks.size() == 1) { - ReturnBlock = ReturningBlocks.front(); // Already has a single return block + // There is nothing more to do if we do not have multiple return blocks. + if (ReturningBlocks.size() <= 1) return false; - } // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return @@ -115,6 +104,5 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BB->getInstList().pop_back(); // Remove the return insn BranchInst::Create(NewRetBlock, BB); } - ReturnBlock = NewRetBlock; return true; } From 5b2b4f331d78f326e5e29166bec5ad92c864343d Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Tue, 1 Sep 2020 18:52:14 -0700 Subject: [PATCH 110/161] Retry of D84974 The test is being disabled on Linux, as lldb-vscode has a bug with --wait-for on LInux. I'm also fixing some compilation warnings. --- .../tools/lldb-vscode/lldbvscode_testcase.py | 14 +- .../test/tools/lldb-vscode/vscode.py | 30 +++- .../tools/lldb-vscode/runInTerminal/Makefile | 3 + .../runInTerminal/TestVSCode_runInTerminal.py | 48 +++++ .../tools/lldb-vscode/runInTerminal/main.c | 11 ++ lldb/tools/lldb-vscode/JSONUtils.cpp | 40 +++++ lldb/tools/lldb-vscode/JSONUtils.h | 12 ++ lldb/tools/lldb-vscode/VSCode.cpp | 70 +++++++- lldb/tools/lldb-vscode/VSCode.h | 45 +++++ lldb/tools/lldb-vscode/lldb-vscode.cpp | 167 ++++++++++-------- lldb/tools/lldb-vscode/package.json | 5 + 11 files changed, 363 insertions(+), 82 deletions(-) create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py create mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/main.c diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index fa5a9c0db1ebd8..5710751ec34bf3 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -282,7 +282,7 @@ def launch(self, program=None, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, debuggerRoot=None, launchCommands=None, - sourceMap=None, disconnectAutomatically=True): + sourceMap=None, disconnectAutomatically=True, runInTerminal=False): '''Sending launch request to vscode ''' @@ -316,10 +316,16 @@ def cleanup(): sourcePath=sourcePath, debuggerRoot=debuggerRoot, launchCommands=launchCommands, - sourceMap=sourceMap) + sourceMap=sourceMap, + runInTerminal=runInTerminal) if not (response and response['success']): self.assertTrue(response['success'], 'launch failed (%s)' % (response['message'])) + # We need to trigger a request_configurationDone after we've successfully + # attached a runInTerminal process to finish initialization. + if runInTerminal: + self.vscode.request_configurationDone() + def build_and_launch(self, program, args=None, cwd=None, env=None, stopOnEntry=False, disableASLR=True, @@ -327,7 +333,7 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, - debuggerRoot=None): + debuggerRoot=None, runInTerminal=False): '''Build the default Makefile target, create the VSCode debug adaptor, and launch the process. ''' @@ -337,4 +343,4 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, self.launch(program, args, cwd, env, stopOnEntry, disableASLR, disableSTDIO, shellExpandArguments, trace, initCommands, preRunCommands, stopCommands, exitCommands, - terminateCommands, sourcePath, debuggerRoot) + terminateCommands, sourcePath, debuggerRoot, runInTerminal=runInTerminal) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 6b1c1c961b5452..834e33ef5c3da7 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -300,12 +300,29 @@ def send_recv(self, command): self.send_packet(command) done = False while not done: - response = self.recv_packet(filter_type='response') - if response is None: + response_or_request = self.recv_packet(filter_type=['response', 'request']) + if response_or_request is None: desc = 'no response for "%s"' % (command['command']) raise ValueError(desc) - self.validate_response(command, response) - return response + if response_or_request['type'] == 'response': + self.validate_response(command, response_or_request) + return response_or_request + else: + if response_or_request['command'] == 'runInTerminal': + subprocess.Popen(response_or_request['arguments']['args'], + env=response_or_request['arguments']['env']) + self.send_packet({ + "type": "response", + "seq": -1, + "request_seq": response_or_request['seq'], + "success": True, + "command": "runInTerminal", + "body": {} + }, set_sequence=False) + else: + desc = 'unkonwn reverse request "%s"' % (response_or_request['command']) + raise ValueError(desc) + return None def wait_for_event(self, filter=None, timeout=None): @@ -599,7 +616,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None ,sourcePath=None, - debuggerRoot=None, launchCommands=None, sourceMap=None): + debuggerRoot=None, launchCommands=None, sourceMap=None, + runInTerminal=False): args_dict = { 'program': program } @@ -638,6 +656,8 @@ def request_launch(self, program, args=None, cwd=None, env=None, args_dict['launchCommands'] = launchCommands if sourceMap: args_dict['sourceMap'] = sourceMap + if runInTerminal: + args_dict['runInTerminal'] = runInTerminal command_dict = { 'command': 'launch', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile new file mode 100644 index 00000000000000..10495940055b63 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py new file mode 100644 index 00000000000000..6a463dfacc1f99 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py @@ -0,0 +1,48 @@ +""" +Test lldb-vscode runInTerminal reverse request +""" + + +import unittest2 +import vscode +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import lldbvscode_testcase +import time +import os + + +class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase): + + mydir = TestBase.compute_mydir(__file__) + + @skipUnlessDarwin + @skipIfRemote + def test_runInTerminal(self): + ''' + Tests the "runInTerminal" reverse request. It makes sure that the IDE can + launch the inferior with the correct environment variables and arguments. + ''' + program = self.getBuildArtifact("a.out") + source = 'main.c' + self.build_and_launch(program, stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"]) + breakpoint_line = line_number(source, '// breakpoint') + + self.set_source_breakpoints(source, [breakpoint_line]) + self.continue_to_next_stop() + + # We verify we actually stopped inside the loop + counter = int(self.vscode.get_local_variable_value('counter')) + self.assertTrue(counter > 0) + + # We verify we were able to set the launch arguments + argc = int(self.vscode.get_local_variable_value('argc')) + self.assertEqual(argc, 2) + + argv1 = self.vscode.request_evaluate('argv[1]')['body']['result'] + self.assertIn('foobar', argv1) + + # We verify we were able to set the environment + env = self.vscode.request_evaluate('foo')['body']['result'] + self.assertIn('bar', env) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c new file mode 100644 index 00000000000000..676bd830e657b4 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c @@ -0,0 +1,11 @@ +#include +#include +#include + +int main(int argc, char *argv[]) { + const char *foo = getenv("FOO"); + for (int counter = 1;; counter++) { + sleep(1); // breakpoint + } + return 0; +} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 36156ca2c42f94..044bfd13ec4635 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -998,4 +998,44 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { return llvm::json::Value(std::move(object)); } +/// See +/// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { + llvm::json::Object reverse_request; + reverse_request.try_emplace("type", "request"); + reverse_request.try_emplace("command", "runInTerminal"); + + llvm::json::Object run_in_terminal_args; + // This indicates the IDE to open an embedded terminal, instead of opening the + // terminal in a new window. + run_in_terminal_args.try_emplace("kind", "integrated"); + + auto launch_request_arguments = launch_request.getObject("arguments"); + std::vector args = GetStrings(launch_request_arguments, "args"); + // The program path must be the first entry in the "args" field + args.insert(args.begin(), + GetString(launch_request_arguments, "program").str()); + run_in_terminal_args.try_emplace("args", args); + + const auto cwd = GetString(launch_request_arguments, "cwd"); + if (!cwd.empty()) + run_in_terminal_args.try_emplace("cwd", cwd); + + // We need to convert the input list of environments variables into a + // dictionary + std::vector envs = GetStrings(launch_request_arguments, "env"); + llvm::json::Object environment; + for (const std::string &env : envs) { + size_t index = env.find("="); + environment.try_emplace(env.substr(0, index), env.substr(index + 1)); + } + run_in_terminal_args.try_emplace("env", + llvm::json::Value(std::move(environment))); + + reverse_request.try_emplace( + "arguments", llvm::json::Value(std::move(run_in_terminal_args))); + return reverse_request; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index df4428f390ba2b..88cbef9e5fdd4d 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -443,6 +443,18 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); +/// Create a runInTerminal reverse request object +/// +/// \param[in] launch_request +/// The original launch_request object whose fields are used to construct +/// the reverse request object. +/// +/// \return +/// A "runInTerminal" JSON object that follows the specification outlined by +/// Microsoft. +llvm::json::Object +CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request); + } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index 537cae7868631e..d57330ce6ff1ae 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -38,7 +38,8 @@ VSCode::VSCode() {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift}, {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}), focus_tid(LLDB_INVALID_THREAD_ID), sent_terminated_event(false), - stop_at_entry(false), is_attach(false) { + stop_at_entry(false), is_attach(false), + reverse_request_seq(0), waiting_for_run_in_terminal(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); #if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 @@ -362,4 +363,71 @@ void VSCode::SetTarget(const lldb::SBTarget target) { } } +PacketStatus VSCode::GetObject(llvm::json::Object &object) { + std::string json = ReadJSON(); + if (json.empty()) + return PacketStatus::EndOfFile; + + llvm::StringRef json_sref(json); + llvm::Expected json_value = llvm::json::parse(json_sref); + if (!json_value) { + auto error = json_value.takeError(); + if (log) { + std::string error_str; + llvm::raw_string_ostream strm(error_str); + strm << error; + strm.flush(); + *log << "error: failed to parse JSON: " << error_str << std::endl + << json << std::endl; + } + return PacketStatus::JSONMalformed; + } + object = *json_value->getAsObject(); + if (!json_value->getAsObject()) { + if (log) + *log << "error: json packet isn't a object" << std::endl; + return PacketStatus::JSONNotObject; + } + return PacketStatus::Success; +} + +bool VSCode::HandleObject(const llvm::json::Object &object) { + const auto packet_type = GetString(object, "type"); + if (packet_type == "request") { + const auto command = GetString(object, "command"); + auto handler_pos = request_handlers.find(std::string(command)); + if (handler_pos != request_handlers.end()) { + handler_pos->second(object); + return true; // Success + } else { + if (log) + *log << "error: unhandled command \"" << command.data() << std::endl; + return false; // Fail + } + } + return false; +} + +PacketStatus VSCode::SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response) { + request.try_emplace("seq", ++reverse_request_seq); + SendJSON(llvm::json::Value(std::move(request))); + while (true) { + PacketStatus status = GetObject(response); + const auto packet_type = GetString(response, "type"); + if (packet_type == "response") + return status; + else { + // Not our response, we got another packet + HandleObject(response); + } + } + return PacketStatus::EndOfFile; +} + +void VSCode::RegisterRequestCallback(std::string request, + RequestCallback callback) { + request_handlers[request] = callback; +} + } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 88a0c08de24547..4a20c56c53eb0e 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,6 +9,7 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H +#include #include #include #include @@ -19,6 +20,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" @@ -65,6 +67,15 @@ enum class OutputType { Console, Stdout, Stderr, Telemetry }; enum VSCodeBroadcasterBits { eBroadcastBitStopEventThread = 1u << 0 }; +typedef void (*RequestCallback)(const llvm::json::Object &command); + +enum class PacketStatus { + Success = 0, + EndOfFile, + JSONMalformed, + JSONNotObject +}; + struct VSCode { InputStream input; OutputStream output; @@ -91,6 +102,10 @@ struct VSCode { bool sent_terminated_event; bool stop_at_entry; bool is_attach; + uint32_t reverse_request_seq; + std::map request_handlers; + std::condition_variable request_in_terminal_cv; + bool waiting_for_run_in_terminal; // Keep track of the last stop thread index IDs as threads won't go away // unless we send a "thread" event to indicate the thread exited. llvm::DenseSet thread_ids; @@ -152,6 +167,36 @@ struct VSCode { /// Set given target object as a current target for lldb-vscode and start /// listeing for its breakpoint events. void SetTarget(const lldb::SBTarget target); + + const std::map &GetRequestHandlers(); + + PacketStatus GetObject(llvm::json::Object &object); + bool HandleObject(const llvm::json::Object &object); + + /// Send a Debug Adapter Protocol reverse request to the IDE + /// + /// \param[in] request + /// The payload of the request to send. + /// + /// \param[out] response + /// The response of the IDE. It might be undefined if there was an error. + /// + /// \return + /// A \a PacketStatus object indicating the sucess or failure of the + /// request. + PacketStatus SendReverseRequest(llvm::json::Object request, + llvm::json::Object &response); + + /// Registers a callback handler for a Debug Adapter Protocol request + /// + /// \param[in] request + /// The name of the request following the Debug Adapter Protocol + /// specification. + /// + /// \param[in] callback + /// The callback to execute when the given request is triggered by the + /// IDE. + void RegisterRequestCallback(std::string request, RequestCallback callback); }; extern VSCode g_vsc; diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 54f2e653d06970..ee01822ba62170 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -384,7 +384,12 @@ void EventThreadFunction() { break; case lldb::eStateSuspended: break; - case lldb::eStateStopped: + case lldb::eStateStopped: { + if (g_vsc.waiting_for_run_in_terminal) { + g_vsc.waiting_for_run_in_terminal = false; + g_vsc.request_in_terminal_cv.notify_one(); + } + } // Only report a stopped event if the process was not restarted. if (!lldb::SBProcess::GetRestartedFromEvent(event)) { SendStdOutStdErr(process); @@ -1374,6 +1379,9 @@ void request_initialize(const llvm::json::Object &request) { filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp)); } body.try_emplace("exceptionBreakpointFilters", std::move(filters)); + // The debug adapter supports launching a debugee in intergrated VSCode + // terminal. + body.try_emplace("supportsRunInTerminalRequest", true); // The debug adapter supports stepping back via the stepBack and // reverseContinue requests. body.try_emplace("supportsStepBack", false); @@ -1433,6 +1441,49 @@ void request_initialize(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } +void request_runInTerminal(const llvm::json::Object &launch_request, + llvm::json::Object &launch_response) { + // We have already created a target that has a valid "program" path to the + // executable. We will attach to the next process whose name matches that + // of the target's. + g_vsc.is_attach = true; + lldb::SBAttachInfo attach_info; + lldb::SBError error; + attach_info.SetWaitForLaunch(true, /*async*/ true); + g_vsc.target.Attach(attach_info, error); + + llvm::json::Object reverse_request = + CreateRunInTerminalReverseRequest(launch_request); + llvm::json::Object reverse_response; + lldb_vscode::PacketStatus status = + g_vsc.SendReverseRequest(reverse_request, reverse_response); + if (status != lldb_vscode::PacketStatus::Success) + error.SetErrorString("Process cannot be launched by IDE."); + + if (error.Success()) { + // Wait for the attach stop event to happen or for a timeout. + g_vsc.waiting_for_run_in_terminal = true; + static std::mutex mutex; + std::unique_lock locker(mutex); + g_vsc.request_in_terminal_cv.wait_for(locker, std::chrono::seconds(10)); + + auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); + if (attached_pid == LLDB_INVALID_PROCESS_ID) + error.SetErrorString("Failed to attach to a process"); + else + SendProcessEvent(Attach); + } + + if (error.Fail()) { + launch_response["success"] = llvm::json::Value(false); + EmplaceSafeString(launch_response, "message", + std::string(error.GetCString())); + } else { + launch_response["success"] = llvm::json::Value(true); + g_vsc.SendJSON(CreateEventObject("initialized")); + } +} + // "LaunchRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -1505,6 +1556,12 @@ void request_launch(const llvm::json::Object &request) { return; } + if (GetBoolean(arguments, "runInTerminal", false)) { + request_runInTerminal(request, response); + g_vsc.SendJSON(llvm::json::Value(std::move(response))); + return; + } + // Instantiate a launch info instance for the target. auto launch_info = g_vsc.target.GetLaunchInfo(); @@ -2831,39 +2888,35 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -const std::map &GetRequestHandlers() { -#define REQUEST_CALLBACK(name) \ - { #name, request_##name } - static std::map g_request_handlers = { - // VSCode Debug Adaptor requests - REQUEST_CALLBACK(attach), - REQUEST_CALLBACK(completions), - REQUEST_CALLBACK(continue), - REQUEST_CALLBACK(configurationDone), - REQUEST_CALLBACK(disconnect), - REQUEST_CALLBACK(evaluate), - REQUEST_CALLBACK(exceptionInfo), - REQUEST_CALLBACK(getCompileUnits), - REQUEST_CALLBACK(initialize), - REQUEST_CALLBACK(launch), - REQUEST_CALLBACK(next), - REQUEST_CALLBACK(pause), - REQUEST_CALLBACK(scopes), - REQUEST_CALLBACK(setBreakpoints), - REQUEST_CALLBACK(setExceptionBreakpoints), - REQUEST_CALLBACK(setFunctionBreakpoints), - REQUEST_CALLBACK(setVariable), - REQUEST_CALLBACK(source), - REQUEST_CALLBACK(stackTrace), - REQUEST_CALLBACK(stepIn), - REQUEST_CALLBACK(stepOut), - REQUEST_CALLBACK(threads), - REQUEST_CALLBACK(variables), - // Testing requests - REQUEST_CALLBACK(_testGetTargetBreakpoints), - }; -#undef REQUEST_CALLBACK - return g_request_handlers; +void RegisterRequestCallbacks() { + g_vsc.RegisterRequestCallback("attach", request_attach); + g_vsc.RegisterRequestCallback("completions", request_completions); + g_vsc.RegisterRequestCallback("continue", request_continue); + g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone); + g_vsc.RegisterRequestCallback("disconnect", request_disconnect); + g_vsc.RegisterRequestCallback("evaluate", request_evaluate); + g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo); + g_vsc.RegisterRequestCallback("getCompileUnits", request_getCompileUnits); + g_vsc.RegisterRequestCallback("initialize", request_initialize); + g_vsc.RegisterRequestCallback("launch", request_launch); + g_vsc.RegisterRequestCallback("next", request_next); + g_vsc.RegisterRequestCallback("pause", request_pause); + g_vsc.RegisterRequestCallback("scopes", request_scopes); + g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints); + g_vsc.RegisterRequestCallback("setExceptionBreakpoints", + request_setExceptionBreakpoints); + g_vsc.RegisterRequestCallback("setFunctionBreakpoints", + request_setFunctionBreakpoints); + g_vsc.RegisterRequestCallback("setVariable", request_setVariable); + g_vsc.RegisterRequestCallback("source", request_source); + g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace); + g_vsc.RegisterRequestCallback("stepIn", request_stepIn); + g_vsc.RegisterRequestCallback("stepOut", request_stepOut); + g_vsc.RegisterRequestCallback("threads", request_threads); + g_vsc.RegisterRequestCallback("variables", request_variables); + // Testing requests + g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints", + request__testGetTargetBreakpoints); } } // anonymous namespace @@ -2895,6 +2948,8 @@ int main(int argc, char *argv[]) { // Initialize LLDB first before we do anything. lldb::SBDebugger::Initialize(); + RegisterRequestCallbacks(); + int portno = -1; LLDBVSCodeOptTable T; @@ -2937,49 +2992,17 @@ int main(int argc, char *argv[]) { g_vsc.output.descriptor = StreamDescriptor::from_file(fileno(stdout), false); } - auto request_handlers = GetRequestHandlers(); uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { - std::string json = g_vsc.ReadJSON(); - if (json.empty()) + llvm::json::Object object; + lldb_vscode::PacketStatus status = g_vsc.GetObject(object); + if (status == lldb_vscode::PacketStatus::EndOfFile) break; + if (status != lldb_vscode::PacketStatus::Success) + return 1; // Fatal error - llvm::StringRef json_sref(json); - llvm::Expected json_value = llvm::json::parse(json_sref); - if (!json_value) { - auto error = json_value.takeError(); - if (g_vsc.log) { - std::string error_str; - llvm::raw_string_ostream strm(error_str); - strm << error; - strm.flush(); - - *g_vsc.log << "error: failed to parse JSON: " << error_str << std::endl - << json << std::endl; - } - return 1; - } - - auto object = json_value->getAsObject(); - if (!object) { - if (g_vsc.log) - *g_vsc.log << "error: json packet isn't a object" << std::endl; + if (!g_vsc.HandleObject(object)) return 1; - } - - const auto packet_type = GetString(object, "type"); - if (packet_type == "request") { - const auto command = GetString(object, "command"); - auto handler_pos = request_handlers.find(std::string(command)); - if (handler_pos != request_handlers.end()) { - handler_pos->second(*object); - } else { - if (g_vsc.log) - *g_vsc.log << "error: unhandled command \"" << command.data() - << std::endl; - return 1; - } - } ++packet_idx; } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 29ca06dd17d636..9077ab51dd7fab 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -175,6 +175,11 @@ "type": "array", "description": "Commands executed at the end of debugging session.", "default": [] + }, + "runInTerminal": { + "type": "boolean", + "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", + "default": false } } }, From 8927c900697adf313fb5f11a09a03f1451439403 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 20:57:40 +0200 Subject: [PATCH 111/161] [InstCombine] Add tests for known bits for min/max intrinsics (NFC) We already have test coverage for the underlying calculation, this just checked that the folding is wired up... --- .../InstCombine/minmax-intrinsics.ll | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/minmax-intrinsics.ll diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll new file mode 100644 index 00000000000000..d808d5fc42445b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +declare i8 @llvm.umin.i8(i8, i8) +declare i8 @llvm.umax.i8(i8, i8) +declare i8 @llvm.smin.i8(i8, i8) +declare i8 @llvm.smax.i8(i8, i8) + +define i8 @umin_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @umin_known_bits( +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = and i8 %x, 127 + %m = call i8 @llvm.umin.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @umax_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_known_bits( +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = or i8 %x, -128 + %m = call i8 @llvm.umax.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @smin_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @smin_known_bits( +; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = or i8 %x, -128 + %m = call i8 @llvm.smin.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} + +define i8 @smax_known_bits(i8 %x, i8 %y) { +; CHECK-LABEL: @smax_known_bits( +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X2]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 +; CHECK-NEXT: ret i8 [[R]] +; + %x2 = and i8 %x, 127 + %m = call i8 @llvm.smax.i8(i8 %x2, i8 %y) + %r = and i8 %m, -128 + ret i8 %r +} From 8453fbf0889e22cf9bbb74c65e36cf8abbcec7b4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 8 Sep 2020 21:06:59 +0200 Subject: [PATCH 112/161] [ValueTracking] Compute known bits of min/max intrinsics Implement known bits for the min/max intrinsics based on the recently added KnownBits primitives. --- llvm/lib/Analysis/ValueTracking.cpp | 20 +++++++++++++++++++ .../InstCombine/minmax-intrinsics.ll | 20 ++++--------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 6e5a7195bb1943..5eb66e96e1d858 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1739,6 +1739,26 @@ static void computeKnownBitsFromOperator(const Operator *I, } break; } + case Intrinsic::umin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umin(Known, Known2); + break; + case Intrinsic::umax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::umax(Known, Known2); + break; + case Intrinsic::smin: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smin(Known, Known2); + break; + case Intrinsic::smax: + computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); + computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q); + Known = KnownBits::smax(Known, Known2); + break; case Intrinsic::x86_sse42_crc32_64_64: Known.Zero.setBitsFrom(32); break; diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index d808d5fc42445b..797f85d9444747 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -8,10 +8,7 @@ declare i8 @llvm.smax.i8(i8, i8) define i8 @umin_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_known_bits( -; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umin.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %x2 = and i8 %x, 127 %m = call i8 @llvm.umin.i8(i8 %x2, i8 %y) @@ -21,10 +18,7 @@ define i8 @umin_known_bits(i8 %x, i8 %y) { define i8 @umax_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @umax_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.umax.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 -128 ; %x2 = or i8 %x, -128 %m = call i8 @llvm.umax.i8(i8 %x2, i8 %y) @@ -34,10 +28,7 @@ define i8 @umax_known_bits(i8 %x, i8 %y) { define i8 @smin_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @smin_known_bits( -; CHECK-NEXT: [[X2:%.*]] = or i8 [[X:%.*]], -128 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 -128 ; %x2 = or i8 %x, -128 %m = call i8 @llvm.smin.i8(i8 %x2, i8 %y) @@ -47,10 +38,7 @@ define i8 @smin_known_bits(i8 %x, i8 %y) { define i8 @smax_known_bits(i8 %x, i8 %y) { ; CHECK-LABEL: @smax_known_bits( -; CHECK-NEXT: [[X2:%.*]] = and i8 [[X:%.*]], 127 -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smax.i8(i8 [[X2]], i8 [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[M]], -128 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %x2 = and i8 %x, 127 %m = call i8 @llvm.smax.i8(i8 %x2, i8 %y) From 66310aafa0da47dd4664a1200afc7e22cab15b65 Mon Sep 17 00:00:00 2001 From: "Paul C. Anagnostopoulos" Date: Sun, 30 Aug 2020 14:00:25 -0400 Subject: [PATCH 113/161] fix typos; improve a couple of descriptions; add release note --- llvm/docs/ReleaseNotes.rst | 7 +++++-- llvm/docs/TableGen/ProgRef.rst | 35 ++++++++++++++++++---------------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 59897806c37a5b..47ce9fa10d908a 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -69,10 +69,13 @@ Changes to building LLVM Changes to TableGen ------------------- +* The new "TableGen Programmer's Reference" replaces the "TableGen Language + Introduction" and "TableGen Language Reference" documents. + * The syntax for specifying an integer range in a range list has changed. The old syntax used a hyphen in the range (e.g., ``{0-9}``). The new syntax - uses the "`...`" range punctuator (e.g., ``{0...9}``). The hyphen syntax - is deprecated. The "TableGen Language Reference" document has been updated. + uses the "`...`" range punctuation (e.g., ``{0...9}``). The hyphen syntax + is deprecated. Changes to the ARM Backend -------------------------- diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 83684ab41c2802..07f0ba8a54dd05 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -140,7 +140,7 @@ the file is printed for review. The following are the basic punctuation tokens:: - - + [ ] { } ( ) < > : ; . = ? # + - + [ ] { } ( ) < > : ; . ... = ? # Literals -------- @@ -328,8 +328,8 @@ to an entity of type ``bits<4>``. .. warning:: The peculiar last form of :token:`RangePiece` is due to the fact that the "``-``" is included in the :token:`TokInteger`, hence ``1-5`` gets lexed as - two consecutive tokens, with values ``1`` and ``-5``, - instead of "1", "-", and "5". + two consecutive tokens, with values ``1`` and ``-5``, instead of "1", "-", + and "5". The use of hyphen as the range punctuation is deprecated. Simple values ------------- @@ -431,7 +431,7 @@ sense after reading the remainder of this guide. * The iteration variable of a ``foreach``, such as the use of ``i`` in:: - foreach i = 0..5 in + foreach i = 0...5 in def Foo#i; .. productionlist:: @@ -466,11 +466,11 @@ primary value. Here are the possible suffixes for some primary *value*. *value*\ ``{17}`` The final value is bit 17 of the integer *value* (note the braces). -*value*\ ``{8..15}`` +*value*\ ``{8...15}`` The final value is bits 8--15 of the integer *value*. The order of the - bits can be reversed by specifying ``{15..8}``. + bits can be reversed by specifying ``{15...8}``. -*value*\ ``[4..7,17,2..3,4]`` +*value*\ ``[4...7,17,2...3,4]`` The final value is a new list that is a slice of the list *value* (note the brackets). The new list contains elements 4, 5, 6, 7, 17, 2, 3, and 4. Elements may be @@ -827,10 +827,13 @@ template that expands into multiple records. MultiClassID: `TokIdentifier` As with regular classes, the multiclass has a name and can accept template -arguments. The body of the multiclass contains a series of statements that -define records, using :token:`Def` and :token:`Defm`. In addition, -:token:`Defvar`, :token:`Foreach`, and :token:`Let` -statements can be used to factor out even more common elements. +arguments. A multiclass can inherit from other multiclasses, which causes +the other multiclasses to be expanded and contribute to the record +definitions in the inheriting multiclass. The body of the multiclass +contains a series of statements that define records, using :token:`Def` and +:token:`Defm`. In addition, :token:`Defvar`, :token:`Foreach`, and +:token:`Let` statements can be used to factor out even more common elements. +The :token:`If` statement can also be used. Also as with regular classes, the multiclass has the implicit template argument ``NAME`` (see NAME_). When a named (non-anonymous) record is @@ -1128,8 +1131,8 @@ the next iteration. The following ``defvar`` will not work:: Variables can also be defined with ``defvar`` in a record body. See `Defvar in Record Body`_ for more details. -``foreach`` --- iterate over a sequence ---------------------------------------- +``foreach`` --- iterate over a sequence of statements +----------------------------------------------------- The ``foreach`` statement iterates over a series of statements, varying a variable over a sequence of values. @@ -1529,7 +1532,7 @@ and non-0 as true. ``!shl(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* left logically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!size(``\ *a*\ ``)`` This operator produces the number of elements in the list *a*. @@ -1537,12 +1540,12 @@ and non-0 as true. ``!sra(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* right arithmetically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!srl(``\ *a*\ ``,`` *count*\ ``)`` This operator shifts *a* right logically by *count* bits and produces the resulting value. The operation is performed on a 64-bit integer; the result - is undefined for shift counts outside 0..63. + is undefined for shift counts outside 0...63. ``!strconcat(``\ *str1*\ ``,`` *str2*\ ``, ...)`` This operator concatenates the string arguments *str1*, *str2*, etc., and From f4ac79a364f2de7270a3238b176e17b40b036305 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Tue, 8 Sep 2020 20:06:07 +0000 Subject: [PATCH 114/161] Sema: extract a check for `isCFError` (NFC) Extract a simple check to check if a `RecordDecl` is a `CFError` Decl. This is a simple refactoring to prepare for an upcoming change. NFC. Patch is extracted from https://github.com/llvm/llvm-project-staging/commit/8afaf3aad2af43cfedca7a24cd817848c4e95c0c. --- clang/include/clang/Sema/Sema.h | 1 + clang/lib/Sema/SemaType.cpp | 52 +++++++++++++++++---------------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 53d0285d370276..129ac0355c87f5 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12415,6 +12415,7 @@ class Sema final { /// The struct behind the CFErrorRef pointer. RecordDecl *CFError = nullptr; + bool isCFError(RecordDecl *D); /// Retrieve the identifier "NSError". IdentifierInfo *getNSErrorIdent(); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 03442fb03b3aa2..d8ea9c03725920 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -4043,32 +4043,9 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, if (auto recordType = type->getAs()) { RecordDecl *recordDecl = recordType->getDecl(); - bool isCFError = false; - if (S.CFError) { - // If we already know about CFError, test it directly. - isCFError = (S.CFError == recordDecl); - } else { - // Check whether this is CFError, which we identify based on its bridge - // to NSError. CFErrorRef used to be declared with "objc_bridge" but is - // now declared with "objc_bridge_mutable", so look for either one of - // the two attributes. - if (recordDecl->getTagKind() == TTK_Struct && numNormalPointers > 0) { - IdentifierInfo *bridgedType = nullptr; - if (auto bridgeAttr = recordDecl->getAttr()) - bridgedType = bridgeAttr->getBridgedType(); - else if (auto bridgeAttr = - recordDecl->getAttr()) - bridgedType = bridgeAttr->getBridgedType(); - - if (bridgedType == S.getNSErrorIdent()) { - S.CFError = recordDecl; - isCFError = true; - } - } - } - // If this is CFErrorRef*, report it as such. - if (isCFError && numNormalPointers == 2 && numTypeSpecifierPointers < 2) { + if (numNormalPointers == 2 && numTypeSpecifierPointers < 2 && + S.isCFError(recordDecl)) { return PointerDeclaratorKind::CFErrorRefPointer; } break; @@ -4092,6 +4069,31 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, } } +bool Sema::isCFError(RecordDecl *RD) { + // If we already know about CFError, test it directly. + if (CFError) + return CFError == RD; + + // Check whether this is CFError, which we identify based on its bridge to + // NSError. CFErrorRef used to be declared with "objc_bridge" but is now + // declared with "objc_bridge_mutable", so look for either one of the two + // attributes. + if (RD->getTagKind() == TTK_Struct) { + IdentifierInfo *bridgedType = nullptr; + if (auto bridgeAttr = RD->getAttr()) + bridgedType = bridgeAttr->getBridgedType(); + else if (auto bridgeAttr = RD->getAttr()) + bridgedType = bridgeAttr->getBridgedType(); + + if (bridgedType == getNSErrorIdent()) { + CFError = RD; + return true; + } + } + + return false; +} + static FileID getNullabilityCompletenessCheckFileID(Sema &S, SourceLocation loc) { // If we're anywhere in a function, method, or closure context, don't perform From 041da0d828e39d849c99adf1391aaa9291f4310f Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Tue, 8 Sep 2020 16:01:30 -0400 Subject: [PATCH 115/161] [HIP] Add gfx1031 and gfx1030 Differential Revision: https://reviews.llvm.org/D87324 --- clang/lib/Basic/Cuda.cpp | 2 +- clang/test/Driver/hip-offload-arch.hip | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/hip-offload-arch.hip diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 709185707bd9c5..2abbe3e81e0a2e 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -84,7 +84,7 @@ CudaArchToStringMap arch_names[] = { GFX(810), // stoney GFX(900), // vega, instinct GFX(902), GFX(904), GFX(906), GFX(908), GFX(909), - GFX(1010), GFX(1011), GFX(1012), + GFX(1010), GFX(1011), GFX(1012), GFX(1030), GFX(1031) // clang-format on }; #undef SM diff --git a/clang/test/Driver/hip-offload-arch.hip b/clang/test/Driver/hip-offload-arch.hip new file mode 100644 index 00000000000000..4cd37b5815f737 --- /dev/null +++ b/clang/test/Driver/hip-offload-arch.hip @@ -0,0 +1,10 @@ +// REQUIRES: clang-driver, x86-registered-target, amdgpu-registered-target + +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --offload-arch=gfx1030 \ +// RUN: --offload-arch=gfx1031 \ +// RUN: -nogpuinc -nogpulib \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1030"}} +// CHECK: {{"[^"]*clang[^"]*".* "-target-cpu" "gfx1031"}} From 5c463d107d3c26fc5573f31b838a8a3a1e4b5065 Mon Sep 17 00:00:00 2001 From: Walter Erquinigo Date: Tue, 8 Sep 2020 13:40:42 -0700 Subject: [PATCH 116/161] Revert "Retry of D84974" This reverts commit 5b2b4f331d78f326e5e29166bec5ad92c864343d. This caused a link error in http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja/builds/18794/steps/build/logs/stdio --- .../tools/lldb-vscode/lldbvscode_testcase.py | 14 +- .../test/tools/lldb-vscode/vscode.py | 30 +--- .../tools/lldb-vscode/runInTerminal/Makefile | 3 - .../runInTerminal/TestVSCode_runInTerminal.py | 48 ----- .../tools/lldb-vscode/runInTerminal/main.c | 11 -- lldb/tools/lldb-vscode/JSONUtils.cpp | 40 ----- lldb/tools/lldb-vscode/JSONUtils.h | 12 -- lldb/tools/lldb-vscode/VSCode.cpp | 70 +------- lldb/tools/lldb-vscode/VSCode.h | 45 ----- lldb/tools/lldb-vscode/lldb-vscode.cpp | 167 ++++++++---------- lldb/tools/lldb-vscode/package.json | 5 - 11 files changed, 82 insertions(+), 363 deletions(-) delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py delete mode 100644 lldb/test/API/tools/lldb-vscode/runInTerminal/main.c diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index 5710751ec34bf3..fa5a9c0db1ebd8 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -282,7 +282,7 @@ def launch(self, program=None, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, debuggerRoot=None, launchCommands=None, - sourceMap=None, disconnectAutomatically=True, runInTerminal=False): + sourceMap=None, disconnectAutomatically=True): '''Sending launch request to vscode ''' @@ -316,16 +316,10 @@ def cleanup(): sourcePath=sourcePath, debuggerRoot=debuggerRoot, launchCommands=launchCommands, - sourceMap=sourceMap, - runInTerminal=runInTerminal) + sourceMap=sourceMap) if not (response and response['success']): self.assertTrue(response['success'], 'launch failed (%s)' % (response['message'])) - # We need to trigger a request_configurationDone after we've successfully - # attached a runInTerminal process to finish initialization. - if runInTerminal: - self.vscode.request_configurationDone() - def build_and_launch(self, program, args=None, cwd=None, env=None, stopOnEntry=False, disableASLR=True, @@ -333,7 +327,7 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None, sourcePath=None, - debuggerRoot=None, runInTerminal=False): + debuggerRoot=None): '''Build the default Makefile target, create the VSCode debug adaptor, and launch the process. ''' @@ -343,4 +337,4 @@ def build_and_launch(self, program, args=None, cwd=None, env=None, self.launch(program, args, cwd, env, stopOnEntry, disableASLR, disableSTDIO, shellExpandArguments, trace, initCommands, preRunCommands, stopCommands, exitCommands, - terminateCommands, sourcePath, debuggerRoot, runInTerminal=runInTerminal) + terminateCommands, sourcePath, debuggerRoot) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index 834e33ef5c3da7..6b1c1c961b5452 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -300,29 +300,12 @@ def send_recv(self, command): self.send_packet(command) done = False while not done: - response_or_request = self.recv_packet(filter_type=['response', 'request']) - if response_or_request is None: + response = self.recv_packet(filter_type='response') + if response is None: desc = 'no response for "%s"' % (command['command']) raise ValueError(desc) - if response_or_request['type'] == 'response': - self.validate_response(command, response_or_request) - return response_or_request - else: - if response_or_request['command'] == 'runInTerminal': - subprocess.Popen(response_or_request['arguments']['args'], - env=response_or_request['arguments']['env']) - self.send_packet({ - "type": "response", - "seq": -1, - "request_seq": response_or_request['seq'], - "success": True, - "command": "runInTerminal", - "body": {} - }, set_sequence=False) - else: - desc = 'unkonwn reverse request "%s"' % (response_or_request['command']) - raise ValueError(desc) - + self.validate_response(command, response) + return response return None def wait_for_event(self, filter=None, timeout=None): @@ -616,8 +599,7 @@ def request_launch(self, program, args=None, cwd=None, env=None, trace=False, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, terminateCommands=None ,sourcePath=None, - debuggerRoot=None, launchCommands=None, sourceMap=None, - runInTerminal=False): + debuggerRoot=None, launchCommands=None, sourceMap=None): args_dict = { 'program': program } @@ -656,8 +638,6 @@ def request_launch(self, program, args=None, cwd=None, env=None, args_dict['launchCommands'] = launchCommands if sourceMap: args_dict['sourceMap'] = sourceMap - if runInTerminal: - args_dict['runInTerminal'] = runInTerminal command_dict = { 'command': 'launch', 'type': 'request', diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile b/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile deleted file mode 100644 index 10495940055b63..00000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -C_SOURCES := main.c - -include Makefile.rules diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py b/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py deleted file mode 100644 index 6a463dfacc1f99..00000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/TestVSCode_runInTerminal.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Test lldb-vscode runInTerminal reverse request -""" - - -import unittest2 -import vscode -from lldbsuite.test.decorators import * -from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil -import lldbvscode_testcase -import time -import os - - -class TestVSCode_runInTerminal(lldbvscode_testcase.VSCodeTestCaseBase): - - mydir = TestBase.compute_mydir(__file__) - - @skipUnlessDarwin - @skipIfRemote - def test_runInTerminal(self): - ''' - Tests the "runInTerminal" reverse request. It makes sure that the IDE can - launch the inferior with the correct environment variables and arguments. - ''' - program = self.getBuildArtifact("a.out") - source = 'main.c' - self.build_and_launch(program, stopOnEntry=True, runInTerminal=True, args=["foobar"], env=["FOO=bar"]) - breakpoint_line = line_number(source, '// breakpoint') - - self.set_source_breakpoints(source, [breakpoint_line]) - self.continue_to_next_stop() - - # We verify we actually stopped inside the loop - counter = int(self.vscode.get_local_variable_value('counter')) - self.assertTrue(counter > 0) - - # We verify we were able to set the launch arguments - argc = int(self.vscode.get_local_variable_value('argc')) - self.assertEqual(argc, 2) - - argv1 = self.vscode.request_evaluate('argv[1]')['body']['result'] - self.assertIn('foobar', argv1) - - # We verify we were able to set the environment - env = self.vscode.request_evaluate('foo')['body']['result'] - self.assertIn('bar', env) diff --git a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c b/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c deleted file mode 100644 index 676bd830e657b4..00000000000000 --- a/lldb/test/API/tools/lldb-vscode/runInTerminal/main.c +++ /dev/null @@ -1,11 +0,0 @@ -#include -#include -#include - -int main(int argc, char *argv[]) { - const char *foo = getenv("FOO"); - for (int counter = 1;; counter++) { - sleep(1); // breakpoint - } - return 0; -} diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp index 044bfd13ec4635..36156ca2c42f94 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.cpp +++ b/lldb/tools/lldb-vscode/JSONUtils.cpp @@ -998,44 +998,4 @@ llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) { return llvm::json::Value(std::move(object)); } -/// See -/// https://microsoft.github.io/debug-adapter-protocol/specification#Reverse_Requests_RunInTerminal -llvm::json::Object -CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request) { - llvm::json::Object reverse_request; - reverse_request.try_emplace("type", "request"); - reverse_request.try_emplace("command", "runInTerminal"); - - llvm::json::Object run_in_terminal_args; - // This indicates the IDE to open an embedded terminal, instead of opening the - // terminal in a new window. - run_in_terminal_args.try_emplace("kind", "integrated"); - - auto launch_request_arguments = launch_request.getObject("arguments"); - std::vector args = GetStrings(launch_request_arguments, "args"); - // The program path must be the first entry in the "args" field - args.insert(args.begin(), - GetString(launch_request_arguments, "program").str()); - run_in_terminal_args.try_emplace("args", args); - - const auto cwd = GetString(launch_request_arguments, "cwd"); - if (!cwd.empty()) - run_in_terminal_args.try_emplace("cwd", cwd); - - // We need to convert the input list of environments variables into a - // dictionary - std::vector envs = GetStrings(launch_request_arguments, "env"); - llvm::json::Object environment; - for (const std::string &env : envs) { - size_t index = env.find("="); - environment.try_emplace(env.substr(0, index), env.substr(index + 1)); - } - run_in_terminal_args.try_emplace("env", - llvm::json::Value(std::move(environment))); - - reverse_request.try_emplace( - "arguments", llvm::json::Value(std::move(run_in_terminal_args))); - return reverse_request; -} - } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h index 88cbef9e5fdd4d..df4428f390ba2b 100644 --- a/lldb/tools/lldb-vscode/JSONUtils.h +++ b/lldb/tools/lldb-vscode/JSONUtils.h @@ -443,18 +443,6 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference, llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit); -/// Create a runInTerminal reverse request object -/// -/// \param[in] launch_request -/// The original launch_request object whose fields are used to construct -/// the reverse request object. -/// -/// \return -/// A "runInTerminal" JSON object that follows the specification outlined by -/// Microsoft. -llvm::json::Object -CreateRunInTerminalReverseRequest(const llvm::json::Object &launch_request); - } // namespace lldb_vscode #endif diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp index d57330ce6ff1ae..537cae7868631e 100644 --- a/lldb/tools/lldb-vscode/VSCode.cpp +++ b/lldb/tools/lldb-vscode/VSCode.cpp @@ -38,8 +38,7 @@ VSCode::VSCode() {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift}, {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}), focus_tid(LLDB_INVALID_THREAD_ID), sent_terminated_event(false), - stop_at_entry(false), is_attach(false), - reverse_request_seq(0), waiting_for_run_in_terminal(false) { + stop_at_entry(false), is_attach(false) { const char *log_file_path = getenv("LLDBVSCODE_LOG"); #if defined(_WIN32) // Windows opens stdout and stdin in text mode which converts \n to 13,10 @@ -363,71 +362,4 @@ void VSCode::SetTarget(const lldb::SBTarget target) { } } -PacketStatus VSCode::GetObject(llvm::json::Object &object) { - std::string json = ReadJSON(); - if (json.empty()) - return PacketStatus::EndOfFile; - - llvm::StringRef json_sref(json); - llvm::Expected json_value = llvm::json::parse(json_sref); - if (!json_value) { - auto error = json_value.takeError(); - if (log) { - std::string error_str; - llvm::raw_string_ostream strm(error_str); - strm << error; - strm.flush(); - *log << "error: failed to parse JSON: " << error_str << std::endl - << json << std::endl; - } - return PacketStatus::JSONMalformed; - } - object = *json_value->getAsObject(); - if (!json_value->getAsObject()) { - if (log) - *log << "error: json packet isn't a object" << std::endl; - return PacketStatus::JSONNotObject; - } - return PacketStatus::Success; -} - -bool VSCode::HandleObject(const llvm::json::Object &object) { - const auto packet_type = GetString(object, "type"); - if (packet_type == "request") { - const auto command = GetString(object, "command"); - auto handler_pos = request_handlers.find(std::string(command)); - if (handler_pos != request_handlers.end()) { - handler_pos->second(object); - return true; // Success - } else { - if (log) - *log << "error: unhandled command \"" << command.data() << std::endl; - return false; // Fail - } - } - return false; -} - -PacketStatus VSCode::SendReverseRequest(llvm::json::Object request, - llvm::json::Object &response) { - request.try_emplace("seq", ++reverse_request_seq); - SendJSON(llvm::json::Value(std::move(request))); - while (true) { - PacketStatus status = GetObject(response); - const auto packet_type = GetString(response, "type"); - if (packet_type == "response") - return status; - else { - // Not our response, we got another packet - HandleObject(response); - } - } - return PacketStatus::EndOfFile; -} - -void VSCode::RegisterRequestCallback(std::string request, - RequestCallback callback) { - request_handlers[request] = callback; -} - } // namespace lldb_vscode diff --git a/lldb/tools/lldb-vscode/VSCode.h b/lldb/tools/lldb-vscode/VSCode.h index 4a20c56c53eb0e..88a0c08de24547 100644 --- a/lldb/tools/lldb-vscode/VSCode.h +++ b/lldb/tools/lldb-vscode/VSCode.h @@ -9,7 +9,6 @@ #ifndef LLDB_TOOLS_LLDB_VSCODE_VSCODE_H #define LLDB_TOOLS_LLDB_VSCODE_VSCODE_H -#include #include #include #include @@ -20,7 +19,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" @@ -67,15 +65,6 @@ enum class OutputType { Console, Stdout, Stderr, Telemetry }; enum VSCodeBroadcasterBits { eBroadcastBitStopEventThread = 1u << 0 }; -typedef void (*RequestCallback)(const llvm::json::Object &command); - -enum class PacketStatus { - Success = 0, - EndOfFile, - JSONMalformed, - JSONNotObject -}; - struct VSCode { InputStream input; OutputStream output; @@ -102,10 +91,6 @@ struct VSCode { bool sent_terminated_event; bool stop_at_entry; bool is_attach; - uint32_t reverse_request_seq; - std::map request_handlers; - std::condition_variable request_in_terminal_cv; - bool waiting_for_run_in_terminal; // Keep track of the last stop thread index IDs as threads won't go away // unless we send a "thread" event to indicate the thread exited. llvm::DenseSet thread_ids; @@ -167,36 +152,6 @@ struct VSCode { /// Set given target object as a current target for lldb-vscode and start /// listeing for its breakpoint events. void SetTarget(const lldb::SBTarget target); - - const std::map &GetRequestHandlers(); - - PacketStatus GetObject(llvm::json::Object &object); - bool HandleObject(const llvm::json::Object &object); - - /// Send a Debug Adapter Protocol reverse request to the IDE - /// - /// \param[in] request - /// The payload of the request to send. - /// - /// \param[out] response - /// The response of the IDE. It might be undefined if there was an error. - /// - /// \return - /// A \a PacketStatus object indicating the sucess or failure of the - /// request. - PacketStatus SendReverseRequest(llvm::json::Object request, - llvm::json::Object &response); - - /// Registers a callback handler for a Debug Adapter Protocol request - /// - /// \param[in] request - /// The name of the request following the Debug Adapter Protocol - /// specification. - /// - /// \param[in] callback - /// The callback to execute when the given request is triggered by the - /// IDE. - void RegisterRequestCallback(std::string request, RequestCallback callback); }; extern VSCode g_vsc; diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index ee01822ba62170..54f2e653d06970 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -384,12 +384,7 @@ void EventThreadFunction() { break; case lldb::eStateSuspended: break; - case lldb::eStateStopped: { - if (g_vsc.waiting_for_run_in_terminal) { - g_vsc.waiting_for_run_in_terminal = false; - g_vsc.request_in_terminal_cv.notify_one(); - } - } + case lldb::eStateStopped: // Only report a stopped event if the process was not restarted. if (!lldb::SBProcess::GetRestartedFromEvent(event)) { SendStdOutStdErr(process); @@ -1379,9 +1374,6 @@ void request_initialize(const llvm::json::Object &request) { filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp)); } body.try_emplace("exceptionBreakpointFilters", std::move(filters)); - // The debug adapter supports launching a debugee in intergrated VSCode - // terminal. - body.try_emplace("supportsRunInTerminalRequest", true); // The debug adapter supports stepping back via the stepBack and // reverseContinue requests. body.try_emplace("supportsStepBack", false); @@ -1441,49 +1433,6 @@ void request_initialize(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -void request_runInTerminal(const llvm::json::Object &launch_request, - llvm::json::Object &launch_response) { - // We have already created a target that has a valid "program" path to the - // executable. We will attach to the next process whose name matches that - // of the target's. - g_vsc.is_attach = true; - lldb::SBAttachInfo attach_info; - lldb::SBError error; - attach_info.SetWaitForLaunch(true, /*async*/ true); - g_vsc.target.Attach(attach_info, error); - - llvm::json::Object reverse_request = - CreateRunInTerminalReverseRequest(launch_request); - llvm::json::Object reverse_response; - lldb_vscode::PacketStatus status = - g_vsc.SendReverseRequest(reverse_request, reverse_response); - if (status != lldb_vscode::PacketStatus::Success) - error.SetErrorString("Process cannot be launched by IDE."); - - if (error.Success()) { - // Wait for the attach stop event to happen or for a timeout. - g_vsc.waiting_for_run_in_terminal = true; - static std::mutex mutex; - std::unique_lock locker(mutex); - g_vsc.request_in_terminal_cv.wait_for(locker, std::chrono::seconds(10)); - - auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); - if (attached_pid == LLDB_INVALID_PROCESS_ID) - error.SetErrorString("Failed to attach to a process"); - else - SendProcessEvent(Attach); - } - - if (error.Fail()) { - launch_response["success"] = llvm::json::Value(false); - EmplaceSafeString(launch_response, "message", - std::string(error.GetCString())); - } else { - launch_response["success"] = llvm::json::Value(true); - g_vsc.SendJSON(CreateEventObject("initialized")); - } -} - // "LaunchRequest": { // "allOf": [ { "$ref": "#/definitions/Request" }, { // "type": "object", @@ -1556,12 +1505,6 @@ void request_launch(const llvm::json::Object &request) { return; } - if (GetBoolean(arguments, "runInTerminal", false)) { - request_runInTerminal(request, response); - g_vsc.SendJSON(llvm::json::Value(std::move(response))); - return; - } - // Instantiate a launch info instance for the target. auto launch_info = g_vsc.target.GetLaunchInfo(); @@ -2888,35 +2831,39 @@ void request__testGetTargetBreakpoints(const llvm::json::Object &request) { g_vsc.SendJSON(llvm::json::Value(std::move(response))); } -void RegisterRequestCallbacks() { - g_vsc.RegisterRequestCallback("attach", request_attach); - g_vsc.RegisterRequestCallback("completions", request_completions); - g_vsc.RegisterRequestCallback("continue", request_continue); - g_vsc.RegisterRequestCallback("configurationDone", request_configurationDone); - g_vsc.RegisterRequestCallback("disconnect", request_disconnect); - g_vsc.RegisterRequestCallback("evaluate", request_evaluate); - g_vsc.RegisterRequestCallback("exceptionInfo", request_exceptionInfo); - g_vsc.RegisterRequestCallback("getCompileUnits", request_getCompileUnits); - g_vsc.RegisterRequestCallback("initialize", request_initialize); - g_vsc.RegisterRequestCallback("launch", request_launch); - g_vsc.RegisterRequestCallback("next", request_next); - g_vsc.RegisterRequestCallback("pause", request_pause); - g_vsc.RegisterRequestCallback("scopes", request_scopes); - g_vsc.RegisterRequestCallback("setBreakpoints", request_setBreakpoints); - g_vsc.RegisterRequestCallback("setExceptionBreakpoints", - request_setExceptionBreakpoints); - g_vsc.RegisterRequestCallback("setFunctionBreakpoints", - request_setFunctionBreakpoints); - g_vsc.RegisterRequestCallback("setVariable", request_setVariable); - g_vsc.RegisterRequestCallback("source", request_source); - g_vsc.RegisterRequestCallback("stackTrace", request_stackTrace); - g_vsc.RegisterRequestCallback("stepIn", request_stepIn); - g_vsc.RegisterRequestCallback("stepOut", request_stepOut); - g_vsc.RegisterRequestCallback("threads", request_threads); - g_vsc.RegisterRequestCallback("variables", request_variables); - // Testing requests - g_vsc.RegisterRequestCallback("_testGetTargetBreakpoints", - request__testGetTargetBreakpoints); +const std::map &GetRequestHandlers() { +#define REQUEST_CALLBACK(name) \ + { #name, request_##name } + static std::map g_request_handlers = { + // VSCode Debug Adaptor requests + REQUEST_CALLBACK(attach), + REQUEST_CALLBACK(completions), + REQUEST_CALLBACK(continue), + REQUEST_CALLBACK(configurationDone), + REQUEST_CALLBACK(disconnect), + REQUEST_CALLBACK(evaluate), + REQUEST_CALLBACK(exceptionInfo), + REQUEST_CALLBACK(getCompileUnits), + REQUEST_CALLBACK(initialize), + REQUEST_CALLBACK(launch), + REQUEST_CALLBACK(next), + REQUEST_CALLBACK(pause), + REQUEST_CALLBACK(scopes), + REQUEST_CALLBACK(setBreakpoints), + REQUEST_CALLBACK(setExceptionBreakpoints), + REQUEST_CALLBACK(setFunctionBreakpoints), + REQUEST_CALLBACK(setVariable), + REQUEST_CALLBACK(source), + REQUEST_CALLBACK(stackTrace), + REQUEST_CALLBACK(stepIn), + REQUEST_CALLBACK(stepOut), + REQUEST_CALLBACK(threads), + REQUEST_CALLBACK(variables), + // Testing requests + REQUEST_CALLBACK(_testGetTargetBreakpoints), + }; +#undef REQUEST_CALLBACK + return g_request_handlers; } } // anonymous namespace @@ -2948,8 +2895,6 @@ int main(int argc, char *argv[]) { // Initialize LLDB first before we do anything. lldb::SBDebugger::Initialize(); - RegisterRequestCallbacks(); - int portno = -1; LLDBVSCodeOptTable T; @@ -2992,17 +2937,49 @@ int main(int argc, char *argv[]) { g_vsc.output.descriptor = StreamDescriptor::from_file(fileno(stdout), false); } + auto request_handlers = GetRequestHandlers(); uint32_t packet_idx = 0; while (!g_vsc.sent_terminated_event) { - llvm::json::Object object; - lldb_vscode::PacketStatus status = g_vsc.GetObject(object); - if (status == lldb_vscode::PacketStatus::EndOfFile) + std::string json = g_vsc.ReadJSON(); + if (json.empty()) break; - if (status != lldb_vscode::PacketStatus::Success) - return 1; // Fatal error - if (!g_vsc.HandleObject(object)) + llvm::StringRef json_sref(json); + llvm::Expected json_value = llvm::json::parse(json_sref); + if (!json_value) { + auto error = json_value.takeError(); + if (g_vsc.log) { + std::string error_str; + llvm::raw_string_ostream strm(error_str); + strm << error; + strm.flush(); + + *g_vsc.log << "error: failed to parse JSON: " << error_str << std::endl + << json << std::endl; + } + return 1; + } + + auto object = json_value->getAsObject(); + if (!object) { + if (g_vsc.log) + *g_vsc.log << "error: json packet isn't a object" << std::endl; return 1; + } + + const auto packet_type = GetString(object, "type"); + if (packet_type == "request") { + const auto command = GetString(object, "command"); + auto handler_pos = request_handlers.find(std::string(command)); + if (handler_pos != request_handlers.end()) { + handler_pos->second(*object); + } else { + if (g_vsc.log) + *g_vsc.log << "error: unhandled command \"" << command.data() + << std::endl; + return 1; + } + } ++packet_idx; } diff --git a/lldb/tools/lldb-vscode/package.json b/lldb/tools/lldb-vscode/package.json index 9077ab51dd7fab..29ca06dd17d636 100644 --- a/lldb/tools/lldb-vscode/package.json +++ b/lldb/tools/lldb-vscode/package.json @@ -175,11 +175,6 @@ "type": "array", "description": "Commands executed at the end of debugging session.", "default": [] - }, - "runInTerminal": { - "type": "boolean", - "description": "Launch the program inside an integrated terminal in the IDE. Useful for debugging interactive command line programs", - "default": false } } }, From c05095cd6865a95ee848cd95d11643969a81a241 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 1 Sep 2020 04:49:49 -0700 Subject: [PATCH 117/161] [Asan] Don't crash if metadata is not initialized Fixes https://github.com/google/sanitizers/issues/1193. AsanChunk can be uninitialized yet just after return from the secondary allocator. If lsan starts scan just before metadata assignment it can fail to find corresponding AsanChunk. It should be safe to ignore this and let lsan to assume that AsanChunk is in the beginning of the block. This block is from the secondary allocator and created with mmap, so it should not contain any pointers and will make lsan to miss some leaks. Similar already happens for primary allocator. If it can't find real AsanChunk it falls back and assume that block starts with AsanChunk. Then if the block is already returned to allocator we have garbage in AsanChunk and may scan dead memory hiding some leaks. I'll fix this in D87135. Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86931 --- compiler-rt/lib/asan/asan_allocator.cpp | 22 +++++-------- .../test/asan/TestCases/lsan_crash.cpp | 31 +++++++++++++++++++ 2 files changed, 39 insertions(+), 14 deletions(-) create mode 100644 compiler-rt/test/asan/TestCases/lsan_crash.cpp diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 7334b7200fc4c8..1d8d5bcad1dc0f 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -730,6 +730,9 @@ struct Allocator { // -------------------------- Chunk lookup ---------------------- // Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg). + // Returns nullptr if AsanChunk is not yet initialized just after + // get_allocator().Allocate(), or is being destroyed just before + // get_allocator().Deallocate(). AsanChunk *GetAsanChunk(void *alloc_beg) { if (!alloc_beg) return nullptr; @@ -1102,26 +1105,17 @@ void GetUserBeginDebug(uptr chunk) { uptr GetUserBegin(uptr chunk) { __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(chunk); - if (!m) { - Printf( - "ASAN is about to crash with a CHECK failure.\n" - "The ASAN developers are trying to chase down this bug,\n" - "so if you've encountered this bug please let us know.\n" - "See also: https://github.com/google/sanitizers/issues/1193\n" - "Internal ref b/149237057\n" - "chunk: %p caller %p __lsan_current_stage %s\n", - chunk, GET_CALLER_PC(), __lsan_current_stage); - GetUserBeginDebug(chunk); - } - CHECK(m); - return m->Beg(); + return m ? m->Beg() : 0; } LsanMetadata::LsanMetadata(uptr chunk) { - metadata_ = reinterpret_cast(chunk - __asan::kChunkHeaderSize); + metadata_ = chunk ? reinterpret_cast(chunk - __asan::kChunkHeaderSize) + : nullptr; } bool LsanMetadata::allocated() const { + if (!metadata_) + return false; __asan::AsanChunk *m = reinterpret_cast<__asan::AsanChunk *>(metadata_); return atomic_load(&m->chunk_state, memory_order_relaxed) == __asan::CHUNK_ALLOCATED; diff --git a/compiler-rt/test/asan/TestCases/lsan_crash.cpp b/compiler-rt/test/asan/TestCases/lsan_crash.cpp new file mode 100644 index 00000000000000..23c2569a0b73c8 --- /dev/null +++ b/compiler-rt/test/asan/TestCases/lsan_crash.cpp @@ -0,0 +1,31 @@ +// RUN: %clangxx_asan -O2 %s -o %t && %run %t + +#include +#include +#include +#include +#include + +std::atomic done; + +void foo() { + std::unique_ptr mem; + + while (!done) + mem.reset(new char[1000000]); +} + +int main() { + std::vector threads; + for (int i = 0; i < 10; ++i) + threads.emplace_back(foo); + + for (int i = 0; i < 100; ++i) + __lsan_do_recoverable_leak_check(); + + done = true; + for (auto &t : threads) + t.join(); + + return 0; +} From 27650a5fed14a99b5c3640444abb0012ca28f3fb Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 1 Sep 2020 05:26:53 -0700 Subject: [PATCH 118/161] [NFC][Asan] Remove Debug code Used for https://github.com/google/sanitizers/issues/1193 Reviewed By: morehouse Differential Revision: https://reviews.llvm.org/D86933 --- compiler-rt/lib/asan/asan_allocator.cpp | 38 ------------------- compiler-rt/lib/lsan/lsan_common.cpp | 7 ---- .../sanitizer_allocator_combined.h | 6 --- .../sanitizer_allocator_primary32.h | 1 - .../sanitizer_allocator_primary64.h | 24 ------------ 5 files changed, 76 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 1d8d5bcad1dc0f..a15c569b42ba08 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -750,26 +750,6 @@ struct Allocator { return reinterpret_cast(alloc_beg); } - AsanChunk *GetAsanChunkDebug(void *alloc_beg) { - if (!alloc_beg) - return nullptr; - if (!allocator.FromPrimary(alloc_beg)) { - uptr *meta = reinterpret_cast(allocator.GetMetaData(alloc_beg)); - AsanChunk *m = reinterpret_cast(meta[1]); - Printf("GetAsanChunkDebug1 alloc_beg %p meta %p m %p\n", alloc_beg, meta, - m); - return m; - } - uptr *alloc_magic = reinterpret_cast(alloc_beg); - Printf( - "GetAsanChunkDebug2 alloc_beg %p alloc_magic %p alloc_magic[0] %p " - "alloc_magic[1] %p\n", - alloc_beg, alloc_magic, alloc_magic[0], alloc_magic[1]); - if (alloc_magic[0] == kAllocBegMagic) - return reinterpret_cast(alloc_magic[1]); - return reinterpret_cast(alloc_beg); - } - AsanChunk *GetAsanChunkByAddr(uptr p) { void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast(p)); return GetAsanChunk(alloc_beg); @@ -782,14 +762,6 @@ struct Allocator { return GetAsanChunk(alloc_beg); } - AsanChunk *GetAsanChunkByAddrFastLockedDebug(uptr p) { - void *alloc_beg = - allocator.GetBlockBeginFastLockedDebug(reinterpret_cast(p)); - Printf("GetAsanChunkByAddrFastLockedDebug p %p alloc_beg %p\n", p, - alloc_beg); - return GetAsanChunkDebug(alloc_beg); - } - uptr AllocationSize(uptr p) { AsanChunk *m = GetAsanChunkByAddr(p); if (!m) return 0; @@ -1093,16 +1065,6 @@ uptr PointsIntoChunk(void* p) { return 0; } -// Debug code. Delete once issue #1193 is chased down. -extern "C" SANITIZER_WEAK_ATTRIBUTE const char *__lsan_current_stage; - -void GetUserBeginDebug(uptr chunk) { - Printf("GetUserBeginDebug1 chunk %p\n", chunk); - __asan::AsanChunk *m = - __asan::instance.GetAsanChunkByAddrFastLockedDebug(chunk); - Printf("GetUserBeginDebug2 m %p\n", m); -} - uptr GetUserBegin(uptr chunk) { __asan::AsanChunk *m = __asan::instance.GetAsanChunkByAddrFastLocked(chunk); return m ? m->Beg() : 0; diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 93ce0ddc3d68e5..41b5ae5483299b 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -25,8 +25,6 @@ #include "sanitizer_common/sanitizer_thread_registry.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" -extern "C" const char *__lsan_current_stage = "unknown"; - #if CAN_SANITIZE_LEAKS namespace __lsan { @@ -362,7 +360,6 @@ static void FloodFillTag(Frontier *frontier, ChunkTag tag) { // ForEachChunk callback. If the chunk is marked as leaked, marks all chunks // which are reachable from it as indirectly leaked. static void MarkIndirectlyLeakedCb(uptr chunk, void *arg) { - __lsan_current_stage = "MarkIndirectlyLeakedCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kReachable) { @@ -375,7 +372,6 @@ static void MarkIndirectlyLeakedCb(uptr chunk, void *arg) { // frontier. static void CollectIgnoredCb(uptr chunk, void *arg) { CHECK(arg); - __lsan_current_stage = "CollectIgnoredCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() == kIgnored) { @@ -405,7 +401,6 @@ struct InvalidPCParam { static void MarkInvalidPCCb(uptr chunk, void *arg) { CHECK(arg); InvalidPCParam *param = reinterpret_cast(arg); - __lsan_current_stage = "MarkInvalidPCCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kReachable && m.tag() != kIgnored) { @@ -481,7 +476,6 @@ static void ClassifyAllChunks(SuspendedThreadsList const &suspended_threads, // ForEachChunk callback. Resets the tags to pre-leak-check state. static void ResetTagsCb(uptr chunk, void *arg) { (void)arg; - __lsan_current_stage = "ResetTagsCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() != kIgnored) @@ -498,7 +492,6 @@ static void PrintStackTraceById(u32 stack_trace_id) { static void CollectLeaksCb(uptr chunk, void *arg) { CHECK(arg); LeakReport *leak_report = reinterpret_cast(arg); - __lsan_current_stage = "CollectLeaksCb"; chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (!m.allocated()) return; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h index 0cf483da1e5c8c..33f89d6d49928c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_combined.h @@ -142,12 +142,6 @@ class CombinedAllocator { return secondary_.GetBlockBeginFastLocked(p); } - void *GetBlockBeginFastLockedDebug(void *p) { - if (primary_.PointerIsMine(p)) - return primary_.GetBlockBeginDebug(p); - return secondary_.GetBlockBeginFastLocked(p); - } - uptr GetActuallyAllocatedSize(void *p) { if (primary_.PointerIsMine(p)) return primary_.GetActuallyAllocatedSize(p); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 2c25a687c5f088..b90dabbf776929 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -211,7 +211,6 @@ class SizeClassAllocator32 { uptr res = beg + (n * (u32)size); return reinterpret_cast(res); } - void *GetBlockBeginDebug(const void *p) { return GetBlockBegin(p); } uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h index a6126fc6265eb5..774c09e4249526 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary64.h @@ -199,30 +199,6 @@ class SizeClassAllocator64 { return nullptr; } - void *GetBlockBeginDebug(const void *p) { - uptr class_id = GetSizeClass(p); - uptr size = ClassIdToSize(class_id); - Printf("GetBlockBeginDebug1 p %p class_id %p size %p\n", p, class_id, size); - if (!size) - return nullptr; - uptr chunk_idx = GetChunkIdx((uptr)p, size); - uptr reg_beg = GetRegionBegin(p); - uptr beg = chunk_idx * size; - uptr next_beg = beg + size; - Printf( - "GetBlockBeginDebug2 chunk_idx %p reg_beg %p beg %p next_beg %p " - "kNumClasses %p\n", - chunk_idx, reg_beg, beg, next_beg, kNumClasses); - if (class_id >= kNumClasses) - return nullptr; - const RegionInfo *region = AddressSpaceView::Load(GetRegionInfo(class_id)); - Printf("GetBlockBeginDebug3 region %p region->mapped_user %p\n", region, - region->mapped_user); - if (region->mapped_user >= next_beg) - return reinterpret_cast(reg_beg + beg); - return nullptr; - } - uptr GetActuallyAllocatedSize(void *p) { CHECK(PointerIsMine(p)); return ClassIdToSize(GetSizeClass(p)); From d183f472617dfedf23381be90612d713d0f439af Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 8 Sep 2020 14:20:41 -0500 Subject: [PATCH 119/161] [Hexagon] Handle widening of truncation's operand with legal result Failing example: v8i8 = truncate v8i32. v8i8 is legal, but v8i32 was widened to HVX. Make sure that v8i8 does not get altered (even if it's changed to another legal type). --- llvm/lib/Target/Hexagon/HexagonISelLowering.h | 1 + .../Target/Hexagon/HexagonISelLoweringHVX.cpp | 68 +++++++++++++------ .../Hexagon/autohvx/isel-truncate-legal.ll | 34 ++++++++++ 3 files changed, 84 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8473515b3c758f..9e7176cd94218c 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -487,6 +487,7 @@ class HexagonTargetLowering : public TargetLowering { findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; + bool shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const; bool isHvxOperation(SDNode *N, SelectionDAG &DAG) const; SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; void LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index e5d05cfe64c47b..22561691f0e027 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1939,16 +1939,36 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); MVT ResTy = ty(Op); MVT OpTy = ty(Op0); + + // .-res, op-> Scalar Illegal HVX + // Scalar ok extract(widen) - + // Illegal - widen widen + // HVX - - ok + if (Subtarget.isHVXVectorType(OpTy)) return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); + assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); + MVT WideOpTy = getWideTy(OpTy); SmallVector Concats = {Op0}; for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i) Concats.push_back(DAG.getUNDEF(OpTy)); SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats); - return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); + SDValue V = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat); + // If the original result wasn't legal and was supposed to be widened, + // we're done. + if (shouldWidenToHvx(ResTy, DAG)) + return V; + + // The original result type wasn't meant to be widened to HVX, so + // leave it as it is. Standard legalization should be able to deal + // with it (since now it's a result of a target-idendependent ISD + // node). + assert(ResTy.isVector()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, + {V, getZero(dl, MVT::i32, DAG)}); } SDValue @@ -2029,11 +2049,15 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, SDValue Op(N, 0); switch (Opc) { + case ISD::TRUNCATE: { + assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?"); + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); + break; + } case ISD::STORE: { - assert( - getPreferredHvxVectorAction(ty(cast(N)->getValue())) == - TargetLoweringBase::TypeWidenVector && - "Not widening?"); + assert(shouldWidenToHvx(ty(cast(N)->getValue()), DAG) && + "Not widening?"); SDValue Store = WidenHvxStore(SDValue(N, 0), DAG); Results.push_back(Store); break; @@ -2061,12 +2085,12 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, unsigned Opc = N->getOpcode(); SDValue Op(N, 0); switch (Opc) { - case ISD::TRUNCATE: - if (!Subtarget.isHVXVectorType(ty(Op), false)) { - SDValue T = WidenHvxTruncate(Op, DAG); - Results.push_back(T); - } + case ISD::TRUNCATE: { + assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?"); + SDValue T = WidenHvxTruncate(Op, DAG); + Results.push_back(T); break; + } case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { SDValue Op(N, 0); @@ -2103,8 +2127,22 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return SDValue(); } +bool +HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { + assert(!Subtarget.isHVXVectorType(Ty, true)); + auto Action = getPreferredHvxVectorAction(Ty); + if (Action == TargetLoweringBase::TypeWidenVector) { + EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty); + assert(WideTy.isSimple()); + return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); + } + return false; +} + bool HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { + if (!Subtarget.useHVXOps()) + return false; // If the type of any result, or any operand type are HVX vector types, // this is an HVX operation. auto IsHvxTy = [this](EVT Ty) { @@ -2122,15 +2160,7 @@ HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { if (!Op.getValueType().isSimple()) return false; MVT ValTy = ty(Op); - if (ValTy.isVector()) { - auto Action = getPreferredVectorAction(ValTy); - if (Action == TargetLoweringBase::TypeWidenVector) { - EVT WideTy = getTypeToTransformTo(*DAG.getContext(), ValTy); - assert(WideTy.isSimple()); - return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); - } - } - return false; + return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG); }; for (int i = 0, e = N->getNumValues(); i != e; ++i) { diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll new file mode 100644 index 00000000000000..e9c7f9cce771e0 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-truncate-legal.ll @@ -0,0 +1,34 @@ +; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s + +; Truncating a type-to-be-widenened to a legal type (v8i8). +; Check that this compiles successfully. +; CHECK-LABEL: f0: +; CHECK: dealloc_return + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @f0(i8* %a0) local_unnamed_addr #0 { +b0: + %v0 = load i8, i8* undef, align 1 + %v1 = zext i8 %v0 to i16 + %v2 = add i16 0, %v1 + %v3 = icmp sgt i16 %v2, 1 + %v4 = select i1 %v3, i16 %v2, i16 1 + %v5 = udiv i16 -32768, %v4 + %v6 = zext i16 %v5 to i32 + %v7 = insertelement <8 x i32> undef, i32 %v6, i32 0 + %v8 = shufflevector <8 x i32> %v7, <8 x i32> undef, <8 x i32> zeroinitializer + %v9 = load <8 x i16>, <8 x i16>* undef, align 2 + %v10 = sext <8 x i16> %v9 to <8 x i32> + %v11 = mul nsw <8 x i32> %v8, %v10 + %v12 = add nsw <8 x i32> %v11, + %v13 = lshr <8 x i32> %v12, + %v14 = trunc <8 x i32> %v13 to <8 x i8> + %v15 = getelementptr inbounds i8, i8* %a0, i32 undef + %v16 = bitcast i8* %v15 to <8 x i8>* + store <8 x i8> %v14, <8 x i8>* %v16, align 1 + ret void +} + +attributes #0 = { "target-features"="+hvx,+hvx-length128b" } From 8893d0816ccdf8998d2e21b5430e9d6abe7ef465 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Wed, 2 Sep 2020 15:33:19 -0700 Subject: [PATCH 120/161] [MLIR] Change Operation::create() methods to use Value/Type/Block ranges. - Introduce a new BlockRange class to represent range of blocks (constructible from an ArrayRef or a SuccessorRange); - Change Operation::create() methods to use TypeRange for result types, ValueRange for operands and BlockRange for successors. Differential Revision: https://reviews.llvm.org/D86985 --- mlir/include/mlir/IR/BlockSupport.h | 41 +++++++++++++++++++++++++ mlir/include/mlir/IR/Operation.h | 14 ++++----- mlir/include/mlir/IR/OperationSupport.h | 8 ++--- mlir/lib/IR/Block.cpp | 28 ++++++++++++++++- mlir/lib/IR/Operation.cpp | 29 +++++++---------- mlir/lib/IR/OperationSupport.cpp | 2 +- 6 files changed, 89 insertions(+), 33 deletions(-) diff --git a/mlir/include/mlir/IR/BlockSupport.h b/mlir/include/mlir/IR/BlockSupport.h index f3dd6140420e48..fc16effbba70da 100644 --- a/mlir/include/mlir/IR/BlockSupport.h +++ b/mlir/include/mlir/IR/BlockSupport.h @@ -75,6 +75,47 @@ class SuccessorRange final friend RangeBaseT; }; +//===----------------------------------------------------------------------===// +// BlockRange +//===----------------------------------------------------------------------===// + +/// This class provides an abstraction over the different types of ranges over +/// Blocks. In many cases, this prevents the need to explicitly materialize a +/// SmallVector/std::vector. This class should be used in places that are not +/// suitable for a more derived type (e.g. ArrayRef) or a template range +/// parameter. +class BlockRange final + : public llvm::detail::indexed_accessor_range_base< + BlockRange, llvm::PointerUnion, + Block *, Block *, Block *> { +public: + using RangeBaseT::RangeBaseT; + BlockRange(ArrayRef blocks = llvm::None); + BlockRange(SuccessorRange successors); + template , Arg>::value>> + BlockRange(Arg &&arg) + : BlockRange(ArrayRef(std::forward(arg))) {} + BlockRange(std::initializer_list blocks) + : BlockRange(ArrayRef(blocks)) {} + +private: + /// The owner of the range is either: + /// * A pointer to the first element of an array of block operands. + /// * A pointer to the first element of an array of Block *. + using OwnerT = llvm::PointerUnion; + + /// See `llvm::detail::indexed_accessor_range_base` for details. + static OwnerT offset_base(OwnerT object, ptrdiff_t index); + + /// See `llvm::detail::indexed_accessor_range_base` for details. + static Block *dereference_iterator(OwnerT object, ptrdiff_t index); + + /// Allow access to `offset_base` and `dereference_iterator`. + friend RangeBaseT; +}; + //===----------------------------------------------------------------------===// // Operation Iterators //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h index 5f5e9017ae5124..6de7677dbf0528 100644 --- a/mlir/include/mlir/IR/Operation.h +++ b/mlir/include/mlir/IR/Operation.h @@ -32,25 +32,25 @@ class Operation final public: /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, ArrayRef attributes, - ArrayRef successors, unsigned numRegions); + BlockRange successors, unsigned numRegions); /// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, unsigned numRegions); + BlockRange successors, unsigned numRegions); /// Create a new Operation from the fields stored in `state`. static Operation *create(const OperationState &state); /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, - ArrayRef resultTypes, ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors = {}, + BlockRange successors = {}, RegionRange regions = {}); /// The name of an operation is the key identifier for it. @@ -633,7 +633,7 @@ class Operation final bool hasValidOrder() { return orderIndex != kInvalidOrderIdx; } private: - Operation(Location location, OperationName name, ArrayRef resultTypes, + Operation(Location location, OperationName name, TypeRange resultTypes, unsigned numSuccessors, unsigned numRegions, const MutableDictionaryAttr &attributes, bool hasOperandStorage); diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 7fce4b808d2e44..11e85f20af4458 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -29,6 +29,7 @@ namespace mlir { class Block; +class BlockRange; class Dialect; class Operation; struct OperationState; @@ -42,7 +43,6 @@ class Pattern; class Region; class ResultRange; class RewritePattern; -class SuccessorRange; class Type; class Value; class ValueRange; @@ -394,12 +394,8 @@ struct OperationState { attributes.append(newAttributes); } - /// Add an array of successors. - void addSuccessors(ArrayRef newSuccessors) { - successors.append(newSuccessors.begin(), newSuccessors.end()); - } void addSuccessors(Block *successor) { successors.push_back(successor); } - void addSuccessors(SuccessorRange newSuccessors); + void addSuccessors(BlockRange newSuccessors); /// Create a region that should be attached to the operation. These regions /// can be filled in immediately without waiting for Operation to be diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp index 71f368c49776e9..e039b41ae4b779 100644 --- a/mlir/lib/IR/Block.cpp +++ b/mlir/lib/IR/Block.cpp @@ -282,7 +282,7 @@ unsigned PredecessorIterator::getSuccessorIndex() const { } //===----------------------------------------------------------------------===// -// Successors +// SuccessorRange //===----------------------------------------------------------------------===// SuccessorRange::SuccessorRange(Block *block) : SuccessorRange(nullptr, 0) { @@ -295,3 +295,29 @@ SuccessorRange::SuccessorRange(Operation *term) : SuccessorRange(nullptr, 0) { if ((count = term->getNumSuccessors())) base = term->getBlockOperands().data(); } + +//===----------------------------------------------------------------------===// +// BlockRange +//===----------------------------------------------------------------------===// + +BlockRange::BlockRange(ArrayRef blocks) : BlockRange(nullptr, 0) { + if ((count = blocks.size())) + base = blocks.data(); +} + +BlockRange::BlockRange(SuccessorRange successors) + : BlockRange(successors.begin().getBase(), successors.size()) {} + +/// See `llvm::detail::indexed_accessor_range_base` for details. +BlockRange::OwnerT BlockRange::offset_base(OwnerT object, ptrdiff_t index) { + if (auto *operand = object.dyn_cast()) + return {operand + index}; + return {object.dyn_cast() + index}; +} + +/// See `llvm::detail::indexed_accessor_range_base` for details. +Block *BlockRange::dereference_iterator(OwnerT object, ptrdiff_t index) { + if (const auto *operand = object.dyn_cast()) + return operand[index].get(); + return object.dyn_cast()[index]; +} diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index b8f9e6c9fdfc4e..f531a6097c257b 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -71,29 +71,24 @@ OperationName OperationName::getFromOpaquePointer(void *pointer) { /// Create a new Operation with the specific fields. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, ArrayRef attributes, - ArrayRef successors, - unsigned numRegions) { + BlockRange successors, unsigned numRegions) { return create(location, name, resultTypes, operands, MutableDictionaryAttr(attributes), successors, numRegions); } /// Create a new Operation from operation state. Operation *Operation::create(const OperationState &state) { - return Operation::create(state.location, state.name, state.types, - state.operands, state.attributes, state.successors, - state.regions); + return create(state.location, state.name, state.types, state.operands, + state.attributes, state.successors, state.regions); } /// Create a new Operation with the specific fields. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, - RegionRange regions) { + BlockRange successors, RegionRange regions) { unsigned numRegions = regions.size(); Operation *op = create(location, name, resultTypes, operands, attributes, successors, numRegions); @@ -106,11 +101,9 @@ Operation *Operation::create(Location location, OperationName name, /// Overload of create that takes an existing MutableDictionaryAttr to avoid /// unnecessarily uniquing a list of attributes. Operation *Operation::create(Location location, OperationName name, - ArrayRef resultTypes, - ArrayRef operands, + TypeRange resultTypes, ValueRange operands, MutableDictionaryAttr attributes, - ArrayRef successors, - unsigned numRegions) { + BlockRange successors, unsigned numRegions) { // We only need to allocate additional memory for a subset of results. unsigned numTrailingResults = OpResult::getNumTrailing(resultTypes.size()); unsigned numInlineResults = OpResult::getNumInline(resultTypes.size()); @@ -167,7 +160,7 @@ Operation *Operation::create(Location location, OperationName name, } Operation::Operation(Location location, OperationName name, - ArrayRef resultTypes, unsigned numSuccessors, + TypeRange resultTypes, unsigned numSuccessors, unsigned numRegions, const MutableDictionaryAttr &attributes, bool hasOperandStorage) @@ -611,8 +604,8 @@ Operation *Operation::cloneWithoutRegions(BlockAndValueMapping &mapper) { successors.push_back(mapper.lookupOrDefault(successor)); // Create the new operation. - auto *newOp = Operation::create(getLoc(), getName(), getResultTypes(), - operands, attrs, successors, getNumRegions()); + auto *newOp = create(getLoc(), getName(), getResultTypes(), operands, attrs, + successors, getNumRegions()); // Remember the mapping of any results. for (unsigned i = 0, e = getNumResults(); i != e; ++i) diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index ab84f4e8cf178a..69aea3bfcf1986 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -186,7 +186,7 @@ void OperationState::addOperands(ValueRange newOperands) { operands.append(newOperands.begin(), newOperands.end()); } -void OperationState::addSuccessors(SuccessorRange newSuccessors) { +void OperationState::addSuccessors(BlockRange newSuccessors) { successors.append(newSuccessors.begin(), newSuccessors.end()); } From 76a2c434f2c35fb27913bf59e0acb0435e59f079 Mon Sep 17 00:00:00 2001 From: Nate Voorhies Date: Tue, 8 Sep 2020 14:19:00 -0700 Subject: [PATCH 121/161] Insert missing bracket in docs. Body of unrolled loop was missing opening bracket. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D87329 --- llvm/docs/TransformMetadata.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/TransformMetadata.rst b/llvm/docs/TransformMetadata.rst index 817b41b43711d1..3c0e10b3eb7a52 100644 --- a/llvm/docs/TransformMetadata.rst +++ b/llvm/docs/TransformMetadata.rst @@ -196,7 +196,7 @@ is transformed into (using an unroll factor of 4): .. code-block:: c int i = 0; - for (; i + 3 < n; i+=4) // unrolled loop + for (; i + 3 < n; i+=4) { // unrolled loop Stmt(i); Stmt(i+1); Stmt(i+2); From b1e68f885b550cf006f5d84b43aa3a0b2905d4b3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 8 Sep 2020 15:09:35 -0700 Subject: [PATCH 122/161] [SelectionDAGBuilder] Pass fast math flags to getNode calls rather than trying to set them after the fact.: This removes the after the fact FMF handling from D46854 in favor of passing fast math flags to getNode. This should be a superset of D87130. This required adding a SDNodeFlags to SelectionDAG::getSetCC. Now we manage to contant fold some stuff undefs during the initial getNode that we don't do in later DAG combines. Differential Revision: https://reviews.llvm.org/D87200 --- llvm/include/llvm/CodeGen/SelectionDAG.h | 6 +- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 69 ++------ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 12 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 11 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 150 +++++++++--------- .../SelectionDAG/SelectionDAGBuilder.h | 7 - .../CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/AArch64/fp-const-fold.ll | 16 -- llvm/test/CodeGen/PowerPC/fmf-propagation.ll | 4 +- llvm/test/CodeGen/SystemZ/fp-mul-14.ll | 3 - .../test/CodeGen/Thumb2/mve-vecreduce-fadd.ll | 76 ++------- llvm/test/CodeGen/X86/fp-undef.ll | 25 --- 15 files changed, 130 insertions(+), 263 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 5607e785e349a2..8db5249743064f 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1049,8 +1049,8 @@ class SelectionDAG { /// Helper function to make it easier to build SetCC's if you just have an /// ISD::CondCode instead of an SDValue. SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, - ISD::CondCode Cond, SDValue Chain = SDValue(), - bool IsSignaling = false) { + ISD::CondCode Cond, SDNodeFlags Flags = SDNodeFlags(), + SDValue Chain = SDValue(), bool IsSignaling = false) { assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() && "Cannot compare scalars to vectors"); assert(LHS.getValueType().isVector() == VT.isVector() && @@ -1060,7 +1060,7 @@ class SelectionDAG { if (Chain) return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL, {VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)}); - return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond)); + return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond), Flags); } /// Helper function to make it easier to build Select's if you just have diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 6eef79162f8a79..fa150831bdbd03 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -357,10 +357,6 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - // This bit is used to determine if the flags are in a defined state. It is - // only used by SelectionDAGBuilder. - bool AnyDefined : 1; - bool NoUnsignedWrap : 1; bool NoSignedWrap : 1; bool Exact : 1; @@ -382,9 +378,8 @@ struct SDNodeFlags { public: /// Default constructor turns off all optimization flags. SDNodeFlags() - : AnyDefined(false), NoUnsignedWrap(false), NoSignedWrap(false), - Exact(false), NoNaNs(false), NoInfs(false), - NoSignedZeros(false), AllowReciprocal(false), + : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), + NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false) {} @@ -399,56 +394,18 @@ struct SDNodeFlags { setAllowReassociation(FPMO.hasAllowReassoc()); } - /// Sets the state of the flags to the defined state. - void setDefined() { AnyDefined = true; } - /// Returns true if the flags are in a defined state. - bool isDefined() const { return AnyDefined; } - // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { - setDefined(); - NoUnsignedWrap = b; - } - void setNoSignedWrap(bool b) { - setDefined(); - NoSignedWrap = b; - } - void setExact(bool b) { - setDefined(); - Exact = b; - } - void setNoNaNs(bool b) { - setDefined(); - NoNaNs = b; - } - void setNoInfs(bool b) { - setDefined(); - NoInfs = b; - } - void setNoSignedZeros(bool b) { - setDefined(); - NoSignedZeros = b; - } - void setAllowReciprocal(bool b) { - setDefined(); - AllowReciprocal = b; - } - void setAllowContract(bool b) { - setDefined(); - AllowContract = b; - } - void setApproximateFuncs(bool b) { - setDefined(); - ApproximateFuncs = b; - } - void setAllowReassociation(bool b) { - setDefined(); - AllowReassociation = b; - } - void setNoFPExcept(bool b) { - setDefined(); - NoFPExcept = b; - } + void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } + void setNoSignedWrap(bool b) { NoSignedWrap = b; } + void setExact(bool b) { Exact = b; } + void setNoNaNs(bool b) { NoNaNs = b; } + void setNoInfs(bool b) { NoInfs = b; } + void setNoSignedZeros(bool b) { NoSignedZeros = b; } + void setAllowReciprocal(bool b) { AllowReciprocal = b; } + void setAllowContract(bool b) { AllowContract = b; } + void setApproximateFuncs(bool b) { ApproximateFuncs = b; } + void setAllowReassociation(bool b) { AllowReassociation = b; } + void setNoFPExcept(bool b) { NoFPExcept = b; } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 37d8cdd695445f..e5c5e5341a6801 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7398,9 +7398,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (N0.hasOneUse()) { // FIXME Can we handle multiple uses? Could we token factor the chain // results from the new/old setcc? - SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, - N0.getOperand(0), - N0Opcode == ISD::STRICT_FSETCCS); + SDValue SetCC = + DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, SDNodeFlags(), + N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS); CombineTo(N, SetCC); DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); recursivelyDeleteUnusedNodes(N0.getNode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f6e4b9363d1a13..7751ebb7705a3f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1735,12 +1735,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode( if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, SDNodeFlags(), Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, SDNodeFlags(), Chain, + IsSignaling); } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, SDNodeFlags(), Chain, + IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, SDNodeFlags(), Chain, + IsSignaling); } if (Chain) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 09b5f14bdb7b45..2399525de66591 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1777,17 +1777,18 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // The following can be improved, but not that much. SDValue Tmp1, Tmp2, Tmp3, OutputChain; Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, ISD::SETOEQ, Chain, IsSignaling); + RHSHi, ISD::SETOEQ, SDNodeFlags(), Chain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, - RHSLo, CCCode, OutputChain, IsSignaling); + RHSLo, CCCode, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); - Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, ISD::SETUNE, OutputChain, IsSignaling); + Tmp1 = + DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, + ISD::SETUNE, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp1->getNumValues() > 1 ? Tmp1.getValue(1) : SDValue(); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, - RHSHi, CCCode, OutputChain, IsSignaling); + RHSHi, CCCode, SDNodeFlags(), OutputChain, IsSignaling); OutputChain = Tmp2->getNumValues() > 1 ? Tmp2.getValue(1) : SDValue(); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5e6cb03f3839c8..2d42eb73606639 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1120,27 +1120,6 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (auto *FPMO = dyn_cast(&I)) { - // ConstrainedFPIntrinsics handle their own FMF. - if (!isa(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - // TODO: The handling of flags should be improved, see - // https://reviews.llvm.org/D86871 - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - } - if (!I.isTerminator() && !HasTailCall && !isa(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -3023,9 +3002,10 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); } - if (auto *ExactOp = dyn_cast(&I)) { + if (auto *ExactOp = dyn_cast(&I)) Flags.setExact(ExactOp->isExact()); - } + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3135,13 +3115,16 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - auto *FPMO = dyn_cast(&I); - if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) + auto *FPMO = cast(&I); + if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); + SDNodeFlags Flags; + Flags.copyFMF(*FPMO); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition, Flags)); } // Check if the condition of the select has one use or two users that are both @@ -3169,6 +3152,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) { bool IsUnaryAbs = false; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -3272,7 +3259,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode( OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags); } } @@ -4876,7 +4863,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4892,13 +4879,13 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4991,13 +4978,13 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5088,13 +5075,13 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5178,25 +5165,26 @@ static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. - return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const TargetLowering &TLI) { + SelectionDAG &DAG, const TargetLowering &TLI, + SDNodeFlags Flags) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -5219,7 +5207,7 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, } // No special expansion. - return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags); } /// ExpandPowI - Expand a llvm.powi intrinsic. @@ -5640,6 +5628,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DebugLoc dl = getCurDebugLoc(); SDValue Res; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast(&I)) + Flags.copyFMF(*FPOp); + switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. @@ -6054,23 +6046,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), DAG)); return; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + getValue(I.getArgOperand(1)), DAG, TLI, Flags)); return; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -6103,7 +6098,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); + getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::lround: @@ -6128,38 +6123,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maxnum: setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + setValue(&I, DAG.getNode( + ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); return; #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: @@ -6174,17 +6168,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. - SDValue Mul = DAG.getNode(ISD::FMUL, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + SDValue Mul = DAG.getNode( + ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), - Mul, - getValue(I.getArgOperand(2))); + Mul, getValue(I.getArgOperand(2)), Flags); setValue(&I, Add); } return; @@ -7532,8 +7524,12 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast(I)); + SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, + DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags)); return true; } @@ -7548,10 +7544,13 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast(I)); + SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags)); return true; } @@ -8952,23 +8951,28 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; FastMathFlags FMF; - if (isa(I)) - FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast(&I)) { + FMF = FPMO->getFastMathFlags(); + SDFlags.copyFMF(*FPMO); + } switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_v2_fadd: if (FMF.allowReassoc()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_v2_fmul: if (FMF.allowReassoc()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); @@ -8998,10 +9002,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; case Intrinsic::experimental_vector_reduce_fmax: - Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; case Intrinsic::experimental_vector_reduce_fmin: - Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 7bad0551981408..e51e7bf89f8e7b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -518,13 +518,6 @@ class SelectionDAGBuilder { SDValue getValue(const Value *V); - /// Return the SDNode for the specified IR value if it exists. - SDNode *getNodeForIRValue(const Value *V) { - if (NodeMap.find(V) == NodeMap.end()) - return nullptr; - return NodeMap[V].getNode(); - } - SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ae98edb74466da..cbdd027f55fef3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6409,7 +6409,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, SDValue Sel; if (Node->isStrictFPOpcode()) { - Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, SDNodeFlags(), Node->getOperand(0), /*IsSignaling*/ true); Chain = Sel.getValue(1); } else { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index b213abb57aa833..f6b5d2ea987f8c 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8219,8 +8219,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); EVT DstSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); - SDValue Sel = - DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true); + SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, + SDNodeFlags(), Chain, true); Chain = Sel.getValue(1); SDValue FltOfs = DAG.getSelect( diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2c7c36325f1469..1cd928c1de1204 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20345,7 +20345,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, *DAG.getContext(), TheVT); SDValue Cmp; if (IsStrict) { - Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, + Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETLT, SDNodeFlags(), Chain, /*IsSignaling*/ true); Chain = Cmp.getValue(1); } else { diff --git a/llvm/test/CodeGen/AArch64/fp-const-fold.ll b/llvm/test/CodeGen/AArch64/fp-const-fold.ll index b282c8719ff637..dc3f71001d610b 100644 --- a/llvm/test/CodeGen/AArch64/fp-const-fold.ll +++ b/llvm/test/CodeGen/AArch64/fp-const-fold.ll @@ -161,49 +161,33 @@ define double @fmul_nnan_inf_op1(double %x) { ret double %r } -; TODO: Should simplify to undef - define double @fdiv_nnan_undef_op0(double %x) { ; CHECK-LABEL: fdiv_nnan_undef_op0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv nnan double undef, %x ret double %r } -; TODO: Should simplify to undef - define double @fdiv_nnan_undef_op1(double %x) { ; CHECK-LABEL: fdiv_nnan_undef_op1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv nnan double %x, undef ret double %r } -; TODO: Should simplify to undef - define double @fdiv_ninf_undef_op0(double %x) { ; CHECK-LABEL: fdiv_ninf_undef_op0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv ninf double undef, %x ret double %r } -; TODO: Should simplify to undef - define double @fdiv_ninf_undef_op1(double %x) { ; CHECK-LABEL: fdiv_ninf_undef_op1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9221120237041090560 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = fdiv ninf double %x, undef ret double %r diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index 90ea31b26916e1..91745b4b3ea215 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -557,13 +557,13 @@ define double @fcmp_nnan(double %a, double %y, double %z) { ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 ; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; FMFDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 +; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' ; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64 ; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 -; GLOBALDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 +; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' declare double @log2(double) diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll index 8bab2135739c44..363511655ad916 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-14.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-14.ll @@ -2,9 +2,6 @@ ; ; Check that a multiply-and-add results. -; FIXME: This test is xfailed temporarily -; XFAIL: * - define void @f1(float %arg, float* %Dst) { ; CHECK-LABEL: f1: ; CHECK: maeb diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll index a1f25e0f33342e..77f0c77033f95b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-fadd.ll @@ -3,30 +3,11 @@ ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP define arm_aapcs_vfpcc float @fadd_v2f32(<2 x float> %x, float %y) { -; CHECK-FP-LABEL: fadd_v2f32: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vadd.f32 s0, s0, s1 -; CHECK-FP-NEXT: vldr s2, .LCPI0_0 -; CHECK-FP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-FP-NEXT: vadd.f32 s0, s4, s0 -; CHECK-FP-NEXT: bx lr -; CHECK-FP-NEXT: .p2align 2 -; CHECK-FP-NEXT: @ %bb.1: -; CHECK-FP-NEXT: .LCPI0_0: -; CHECK-FP-NEXT: .long 0x00000000 @ float 0 -; -; CHECK-NOFP-LABEL: fadd_v2f32: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s1 -; CHECK-NOFP-NEXT: vldr s2, .LCPI0_0 -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f32 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f32 s0, s4, s0 -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 2 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI0_0: -; CHECK-NOFP-NEXT: .long 0x00000000 @ float 0 +; CHECK-LABEL: fadd_v2f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.f32 s0, s0, s1 +; CHECK-NEXT: vadd.f32 s0, s4, s0 +; CHECK-NEXT: bx lr entry: %z = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v2f32(float %y, <2 x float> %x) ret float %z @@ -80,34 +61,14 @@ entry: } define arm_aapcs_vfpcc void @fadd_v2f16(<2 x half> %x, half* %yy) { -; CHECK-FP-LABEL: fadd_v2f16: -; CHECK-FP: @ %bb.0: @ %entry -; CHECK-FP-NEXT: vmovx.f16 s4, s0 -; CHECK-FP-NEXT: vadd.f16 s0, s0, s4 -; CHECK-FP-NEXT: vldr.16 s2, [r0] -; CHECK-FP-NEXT: vadd.f16 s0, s2, s0 -; CHECK-FP-NEXT: vstr.16 s0, [r0] -; CHECK-FP-NEXT: bx lr -; -; CHECK-NOFP-LABEL: fadd_v2f16: -; CHECK-NOFP: @ %bb.0: @ %entry -; CHECK-NOFP-NEXT: vmovx.f16 s4, s0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s4 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI3_0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vldr.16 s2, [r0] -; CHECK-NOFP-NEXT: vadd.f16 s0, s2, s0 -; CHECK-NOFP-NEXT: vstr.16 s0, [r0] -; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI3_0: -; CHECK-NOFP-NEXT: .short 0x0000 @ half 0 +; CHECK-LABEL: fadd_v2f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vadd.f16 s0, s0, s4 +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vadd.f16 s0, s2, s0 +; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: bx lr entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v2f16(half %y, <2 x half> %x) @@ -134,20 +95,11 @@ define arm_aapcs_vfpcc void @fadd_v4f16(<4 x half> %x, half* %yy) { ; CHECK-NOFP-NEXT: vadd.f16 s4, s0, s4 ; CHECK-NOFP-NEXT: vmovx.f16 s0, s1 ; CHECK-NOFP-NEXT: vadd.f16 s4, s4, s1 -; CHECK-NOFP-NEXT: vldr.16 s2, .LCPI4_0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s4, s0 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 -; CHECK-NOFP-NEXT: vadd.f16 s0, s0, s2 ; CHECK-NOFP-NEXT: vldr.16 s2, [r0] +; CHECK-NOFP-NEXT: vadd.f16 s0, s4, s0 ; CHECK-NOFP-NEXT: vadd.f16 s0, s2, s0 ; CHECK-NOFP-NEXT: vstr.16 s0, [r0] ; CHECK-NOFP-NEXT: bx lr -; CHECK-NOFP-NEXT: .p2align 1 -; CHECK-NOFP-NEXT: @ %bb.1: -; CHECK-NOFP-NEXT: .LCPI4_0: -; CHECK-NOFP-NEXT: .short 0x0000 @ half 0 entry: %y = load half, half* %yy %z = call fast half @llvm.experimental.vector.reduce.v2.fadd.f16.v4f16(half %y, <4 x half> %x) diff --git a/llvm/test/CodeGen/X86/fp-undef.ll b/llvm/test/CodeGen/X86/fp-undef.ll index d46bea703fdf0b..95049d16a7bf44 100644 --- a/llvm/test/CodeGen/X86/fp-undef.ll +++ b/llvm/test/CodeGen/X86/fp-undef.ll @@ -100,7 +100,6 @@ define float @frem_undef_op1(float %x) { define float @fadd_undef_op0_nnan(float %x) { ; ANY-LABEL: fadd_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd nnan float undef, %x ret float %r @@ -109,7 +108,6 @@ define float @fadd_undef_op0_nnan(float %x) { define float @fadd_undef_op1_fast(float %x) { ; ANY-LABEL: fadd_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd fast float %x, undef ret float %r @@ -118,7 +116,6 @@ define float @fadd_undef_op1_fast(float %x) { define float @fsub_undef_op0_fast(float %x) { ; ANY-LABEL: fsub_undef_op0_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, %x ret float %r @@ -127,7 +124,6 @@ define float @fsub_undef_op0_fast(float %x) { define float @fsub_undef_op1_nnan(float %x) { ; ANY-LABEL: fsub_undef_op1_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub nnan float %x, undef ret float %r @@ -136,7 +132,6 @@ define float @fsub_undef_op1_nnan(float %x) { define float @fmul_undef_op0_nnan(float %x) { ; ANY-LABEL: fmul_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul nnan float undef, %x ret float %r @@ -145,7 +140,6 @@ define float @fmul_undef_op0_nnan(float %x) { define float @fmul_undef_op1_fast(float %x) { ; ANY-LABEL: fmul_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul fast float %x, undef ret float %r @@ -154,7 +148,6 @@ define float @fmul_undef_op1_fast(float %x) { define float @fdiv_undef_op0_fast(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, %x ret float %r @@ -163,7 +156,6 @@ define float @fdiv_undef_op0_fast(float %x) { define float @fdiv_undef_op1_nnan(float %x) { ; ANY-LABEL: fdiv_undef_op1_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv nnan float %x, undef ret float %r @@ -172,7 +164,6 @@ define float @fdiv_undef_op1_nnan(float %x) { define float @frem_undef_op0_nnan(float %x) { ; ANY-LABEL: frem_undef_op0_nnan: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, %x ret float %r @@ -181,7 +172,6 @@ define float @frem_undef_op0_nnan(float %x) { define float @frem_undef_op1_fast(float %x) { ; ANY-LABEL: frem_undef_op1_fast: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem fast float %x, undef ret float %r @@ -234,7 +224,6 @@ define double @frem_undef_undef(double %x) { define float @fadd_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: fadd_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fadd nnan float undef, 1.0 ret float %r @@ -252,7 +241,6 @@ define float @fadd_undef_op1_constant(float %x) { define float @fsub_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fsub_undef_op0_fast_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fsub fast float undef, 3.0 ret float %r @@ -270,7 +258,6 @@ define float @fsub_undef_op1_constant(float %x) { define float @fmul_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: fmul_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fmul nnan float undef, 5.0 ret float %r @@ -288,7 +275,6 @@ define float @fmul_undef_op1_constant(float %x) { define float @fdiv_undef_op0_fast_constant(float %x) { ; ANY-LABEL: fdiv_undef_op0_fast_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = fdiv fast float undef, 7.0 ret float %r @@ -306,7 +292,6 @@ define float @fdiv_undef_op1_constant(float %x) { define float @frem_undef_op0_nnan_constant(float %x) { ; ANY-LABEL: frem_undef_op0_nnan_constant: ; ANY: # %bb.0: -; ANY-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; ANY-NEXT: retq %r = frem nnan float undef, 9.0 ret float %r @@ -335,7 +320,6 @@ define double @fadd_undef_op0_constant_nan(double %x) { define double @fadd_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: fadd_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000001, undef ret double %r @@ -353,7 +337,6 @@ define double @fsub_undef_op0_constant_nan(double %x) { define double @fsub_undef_op1_nnan_constant_nan(double %x) { ; ANY-LABEL: fsub_undef_op1_nnan_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub nnan double 0x7FF0000000000011, undef ret double %r @@ -371,7 +354,6 @@ define double @fmul_undef_op0_constant_nan(double %x) { define double @fmul_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: fmul_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000101, undef ret double %r @@ -389,7 +371,6 @@ define double @fdiv_undef_op0_constant_nan(double %x) { define double @fdiv_undef_op1_nnan_constant_nan(double %x) { ; ANY-LABEL: fdiv_undef_op1_nnan_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv nnan double 0x7FF0000000000111, undef ret double %r @@ -407,7 +388,6 @@ define double @frem_undef_op0_constant_nan(double %x) { define double @frem_undef_op1_fast_constant_nan(double %x) { ; ANY-LABEL: frem_undef_op1_fast_constant_nan: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem fast double 0xFFF0000000001001, undef ret double %r @@ -427,7 +407,6 @@ define double @fadd_undef_op0_constant_inf(double %x) { define double @fadd_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: fadd_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fadd fast double 0xFFF0000000000000, undef ret double %r @@ -445,7 +424,6 @@ define double @fsub_undef_op0_constant_inf(double %x) { define double @fsub_undef_op1_ninf_constant_inf(double %x) { ; ANY-LABEL: fsub_undef_op1_ninf_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fsub ninf double 0x7FF0000000000000, undef ret double %r @@ -463,7 +441,6 @@ define double @fmul_undef_op0_constant_inf(double %x) { define double @fmul_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: fmul_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fmul fast double 0xFFF0000000000000, undef ret double %r @@ -481,7 +458,6 @@ define double @fdiv_undef_op0_constant_inf(double %x) { define double @fdiv_undef_op1_ninf_constant_inf(double %x) { ; ANY-LABEL: fdiv_undef_op1_ninf_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = fdiv ninf double 0x7FF0000000000000, undef ret double %r @@ -499,7 +475,6 @@ define double @frem_undef_op0_constant_inf(double %x) { define double @frem_undef_op1_fast_constant_inf(double %x) { ; ANY-LABEL: frem_undef_op1_fast_constant_inf: ; ANY: # %bb.0: -; ANY-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; ANY-NEXT: retq %r = frem fast double 0xFFF0000000000000, undef ret double %r From 69da27c7496ea373567ce5121e6fe8613846e7a5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 8 Sep 2020 14:05:20 -0700 Subject: [PATCH 123/161] llvm-symbolizer: Add optional "start file" to match "start line" Since a function might have portions of its code coming from multiple different files, "start line" is ambiguous (it can't just be resolved relative to the file/line specified). Add start file to disambiguate it. --- llvm/include/llvm/DebugInfo/DIContext.h | 18 +++++++++++------ llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h | 1 + llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 20 +++++++++++++++---- llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 11 ++++++++++ llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp | 6 ++++-- llvm/test/tools/llvm-dwarfdump/X86/lookup.s | 6 +++--- .../tools/llvm-symbolizer/sym-verbose.test | 12 +++++++++++ 7 files changed, 59 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h index 661d30d04c94e1..ae78fe912188de 100644 --- a/llvm/include/llvm/DebugInfo/DIContext.h +++ b/llvm/include/llvm/DebugInfo/DIContext.h @@ -35,6 +35,7 @@ struct DILineInfo { static constexpr const char *const Addr2LineBadString = "??"; std::string FileName; std::string FunctionName; + std::string StartFileName; Optional Source; uint32_t Line = 0; uint32_t Column = 0; @@ -43,12 +44,15 @@ struct DILineInfo { // DWARF-specific. uint32_t Discriminator = 0; - DILineInfo() : FileName(BadString), FunctionName(BadString) {} + DILineInfo() + : FileName(BadString), FunctionName(BadString), StartFileName(BadString) { + } bool operator==(const DILineInfo &RHS) const { return Line == RHS.Line && Column == RHS.Column && FileName == RHS.FileName && FunctionName == RHS.FunctionName && - StartLine == RHS.StartLine && Discriminator == RHS.Discriminator; + StartFileName == RHS.StartFileName && StartLine == RHS.StartLine && + Discriminator == RHS.Discriminator; } bool operator!=(const DILineInfo &RHS) const { @@ -56,10 +60,10 @@ struct DILineInfo { } bool operator<(const DILineInfo &RHS) const { - return std::tie(FileName, FunctionName, Line, Column, StartLine, - Discriminator) < - std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column, - RHS.StartLine, RHS.Discriminator); + return std::tie(FileName, FunctionName, StartFileName, Line, Column, + StartLine, Discriminator) < + std::tie(RHS.FileName, RHS.FunctionName, RHS.StartFileName, RHS.Line, + RHS.Column, RHS.StartLine, RHS.Discriminator); } explicit operator bool() const { return *this != DILineInfo(); } @@ -72,6 +76,8 @@ struct DILineInfo { OS << "function '" << FunctionName << "', "; OS << "line " << Line << ", "; OS << "column " << Column << ", "; + if (StartFileName != BadString) + OS << "start file '" << StartFileName << "', "; OS << "start line " << StartLine << '\n'; } }; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index 05a6056e8e21f2..5789421e530440 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -262,6 +262,7 @@ class DWARFDie { /// for this subprogram by resolving DW_AT_sepcification or /// DW_AT_abstract_origin references if necessary. uint64_t getDeclLine() const; + std::string getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const; /// Retrieves values of DW_AT_call_file, DW_AT_call_line and DW_AT_call_column /// from DIE (or zeroes if they are missing). This function looks for diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index d31c358798211a..47eba48c279dd8 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -1036,7 +1036,9 @@ DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU, uint64_t Address, FunctionNameKind Kind, + DILineInfoSpecifier::FileLineInfoKind FileNameKind, std::string &FunctionName, + std::string &StartFile, uint32_t &StartLine) { // The address may correspond to instruction in some inlined function, // so we have to build the chain of inlined functions and take the @@ -1053,6 +1055,11 @@ static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU, FunctionName = Name; FoundResult = true; } + std::string DeclFile = DIE.getDeclFile(FileNameKind); + if (!DeclFile.empty()) { + StartFile = DeclFile; + FoundResult = true; + } if (auto DeclLineResult = DIE.getDeclLine()) { StartLine = DeclLineResult; FoundResult = true; @@ -1224,8 +1231,9 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address, if (!CU) return Result; - getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, - Result.FunctionName, Result.StartLine); + getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind, + Result.FunctionName, + Result.StartFileName, Result.StartLine); if (Spec.FLIKind != FileLineInfoKind::None) { if (const DWARFLineTable *LineTable = getLineTableForUnit(CU)) { LineTable->getFileLineInfoForAddress( @@ -1244,15 +1252,17 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( return Lines; uint32_t StartLine = 0; + std::string StartFileName; std::string FunctionName(DILineInfo::BadString); - getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, - FunctionName, StartLine); + getFunctionNameAndStartLineForAddress(CU, Address.Address, Spec.FNKind, Spec.FLIKind, + FunctionName, StartFileName, StartLine); // If the Specifier says we don't need FileLineInfo, just // return the top-most function at the starting address. if (Spec.FLIKind == FileLineInfoKind::None) { DILineInfo Result; Result.FunctionName = FunctionName; + Result.StartFileName = StartFileName; Result.StartLine = StartLine; Lines.push_back(std::make_pair(Address.Address, Result)); return Lines; @@ -1276,6 +1286,7 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange( Result.FunctionName = FunctionName; Result.Line = Row.Line; Result.Column = Row.Column; + Result.StartFileName = StartFileName; Result.StartLine = StartLine; Lines.push_back(std::make_pair(Row.Address.Address, Result)); } @@ -1318,6 +1329,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address, Frame.FunctionName = Name; if (auto DeclLineResult = FunctionDIE.getDeclLine()) Frame.StartLine = DeclLineResult; + Frame.StartFileName = FunctionDIE.getDeclFile(Spec.FLIKind); if (Spec.FLIKind != FileLineInfoKind::None) { if (i == 0) { // For the topmost frame, initialize the line table of this diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 116f72a1d58baa..31340077a126d7 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -557,6 +557,17 @@ uint64_t DWARFDie::getDeclLine() const { return toUnsigned(findRecursively(DW_AT_decl_line), 0); } +std::string +DWARFDie::getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const { + std::string FileName; + if (auto DeclFile = toUnsigned(findRecursively(DW_AT_decl_file))) { + if (const auto *LT = U->getContext().getLineTableForUnit(U)) { + LT->getFileNameByIndex(*DeclFile, U->getCompilationDir(), Kind, FileName); + } + } + return FileName; +} + void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, uint32_t &CallColumn, uint32_t &CallDiscriminator) const { diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index 10352237763c9f..01dc31d8496571 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -84,8 +84,10 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) { return; } OS << " Filename: " << Filename << "\n"; - if (Info.StartLine) - OS << "Function start line: " << Info.StartLine << "\n"; + if (Info.StartLine) { + OS << " Function start filename: " << Info.StartFileName << "\n"; + OS << " Function start line: " << Info.StartLine << "\n"; + } OS << " Line: " << Info.Line << "\n"; OS << " Column: " << Info.Column << "\n"; if (Info.Discriminator) diff --git a/llvm/test/tools/llvm-dwarfdump/X86/lookup.s b/llvm/test/tools/llvm-dwarfdump/X86/lookup.s index 74f3314a4f4ec1..fed2271f70a065 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/lookup.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/lookup.s @@ -37,9 +37,9 @@ # LEX: DW_AT_low_pc (0x0000000000000004) # LEX: DW_AT_high_pc (0x0000000000000014) -# A: Line info: file 'foo.c', line 3, column 9, start line 1 -# B: Line info: file 'foo.c', line 4, column 6, start line 1 -# C: Line info: file 'foo.c', line 6, column 1, start line 1 +# A: Line info: file 'foo.c', line 3, column 9, start file 'foo.c', start line 1 +# B: Line info: file 'foo.c', line 4, column 6, start file 'foo.c', start line 1 +# C: Line info: file 'foo.c', line 6, column 1, start file 'foo.c', start line 1 .section __TEXT,__text,regular,pure_instructions .macosx_version_min 10, 13 diff --git a/llvm/test/tools/llvm-symbolizer/sym-verbose.test b/llvm/test/tools/llvm-symbolizer/sym-verbose.test index c12eb3b530e1bd..15292903790938 100644 --- a/llvm/test/tools/llvm-symbolizer/sym-verbose.test +++ b/llvm/test/tools/llvm-symbolizer/sym-verbose.test @@ -18,11 +18,13 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x400590 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 7 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -30,12 +32,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005a5 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 17 #CHECK-NEXT: Discriminator: 2 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -43,12 +47,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005ad #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 0 #CHECK-NEXT: Column: 30 #CHECK-NEXT: Discriminator: 4 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -56,11 +62,13 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005b9 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 7 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -69,12 +77,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005ce #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 17 #CHECK-NEXT: Discriminator: 2 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 @@ -83,12 +93,14 @@ RUN: llvm-symbolizer -verbose -print-address -obj=%p/Inputs/discrim < %p/Inputs/ #CHECK: 0x4005d4 #CHECK-NEXT: foo #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 4 #CHECK-NEXT: Line: 5 #CHECK-NEXT: Column: 30 #CHECK-NEXT: Discriminator: 4 #CHECK-NEXT: main #CHECK-NEXT: Filename: /tmp{{[\\/]}}discrim.c +#CHECK-NEXT: Function start filename: /tmp{{[\\/]}}discrim.c #CHECK-NEXT: Function start line: 9 #CHECK-NEXT: Line: 10 #CHECK-NEXT: Column: 0 From 88bf133c99c3124842c182a019306f83f2c1b856 Mon Sep 17 00:00:00 2001 From: Ryan Prichard Date: Thu, 27 Aug 2020 23:46:49 -0700 Subject: [PATCH 124/161] [libunwind] Replace chain-of-ifdefs for dl_iterate_phdr Define a _LIBUNWIND_USE_DL_ITERATE_PHDR macro in config.h when there is no other unwind info lookup method. Also define a _LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX macro to factor out (__BIONIC__ and _LIBUNWIND_ARM_EHABI). Differential Revision: https://reviews.llvm.org/D86768 --- libunwind/src/AddressSpace.hpp | 59 +++++++------------ libunwind/src/config.h | 11 ++++ libunwind/test/frameheadercache_test.pass.cpp | 27 ++------- 3 files changed, 35 insertions(+), 62 deletions(-) diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp index e6f2609d679b93..cc298c9bbb8386 100644 --- a/libunwind/src/AddressSpace.hpp +++ b/libunwind/src/AddressSpace.hpp @@ -98,22 +98,15 @@ extern char __eh_frame_hdr_end; extern char __exidx_start; extern char __exidx_end; -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) - -// ELF-based systems may use dl_iterate_phdr() to access sections -// containing unwinding information. The ElfW() macro for pointer-size -// independent ELF header traversal is not provided by on some -// systems (e.g., FreeBSD). On these systems the data structures are -// just called Elf_XXX. Define ElfW() locally. -#ifndef _WIN32 -#include -#else +#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) + #include #include -#endif -#if !defined(ElfW) -#define ElfW(type) Elf_##type -#endif + +#elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) || \ + defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX) + +#include #endif @@ -351,23 +344,14 @@ LocalAddressSpace::getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, return result; } -#ifdef __APPLE__ -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) -#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) -// Code inside findUnwindSections handles all these cases. -// -// Although the above ifdef chain is ugly, there doesn't seem to be a cleaner -// way to handle it. The generalized boolean expression is: -// -// A OR (B AND C) OR (D AND C) OR (B AND E) OR (F AND E) OR (D AND G) -// -// Running it through various boolean expression simplifiers gives expressions -// that don't help at all. -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) +// The ElfW() macro for pointer-size independent ELF header traversal is not +// provided by on some systems (e.g., FreeBSD). On these systems the +// data structures are just called Elf_XXX. Define ElfW() locally. +#if !defined(ElfW) + #define ElfW(type) Elf_##type +#endif #if !defined(Elf_Half) typedef ElfW(Half) Elf_Half; #endif @@ -482,9 +466,7 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, return 0; } -#else // defined(LIBUNWIND_SUPPORT_DWARF_UNWIND) -// Given all the #ifdef's above, the code here is for -// defined(LIBUNWIND_ARM_EHABI) +#elif defined(_LIBUNWIND_ARM_EHABI) static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, void *data) { @@ -516,8 +498,9 @@ static int findUnwindSectionsByPhdr(struct dl_phdr_info *pinfo, size_t, } return found_obj && found_hdr; } -#endif // defined(LIBUNWIND_SUPPORT_DWARF_UNWIND) -#endif // defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) + +#endif +#endif // defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, @@ -601,16 +584,14 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, (void)targetAddr; (void)info; return true; -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) - // For ARM EHABI, Bionic didn't implement dl_iterate_phdr until API 21. After - // API 21, dl_iterate_phdr exists, but dl_unwind_find_exidx is much faster. +#elif defined(_LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX) int length = 0; info.arm_section = (uintptr_t)dl_unwind_find_exidx((_Unwind_Ptr)targetAddr, &length); info.arm_section_length = (uintptr_t)length * sizeof(EHABIIndexEntry); if (info.arm_section && info.arm_section_length) return true; -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#elif defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) dl_iterate_cb_data cb_data = {this, &info, targetAddr}; int found = dl_iterate_phdr(findUnwindSectionsByPhdr, &cb_data); return static_cast(found); diff --git a/libunwind/src/config.h b/libunwind/src/config.h index fd177dd7338c15..0885dccda07eba 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -34,7 +34,18 @@ #else #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #endif +#elif defined(_LIBUNWIND_IS_BAREMETAL) + #if !defined(_LIBUNWIND_ARM_EHABI) + #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 + #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 + #endif +#elif defined(__BIONIC__) && defined(_LIBUNWIND_ARM_EHABI) + // For ARM EHABI, Bionic didn't implement dl_iterate_phdr until API 21. After + // API 21, dl_iterate_phdr exists, but dl_unwind_find_exidx is much faster. + #define _LIBUNWIND_USE_DL_UNWIND_FIND_EXIDX 1 #else + // Assume an ELF system with a dl_iterate_phdr function. + #define _LIBUNWIND_USE_DL_ITERATE_PHDR 1 #if !defined(_LIBUNWIND_ARM_EHABI) #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_INDEX 1 diff --git a/libunwind/test/frameheadercache_test.pass.cpp b/libunwind/test/frameheadercache_test.pass.cpp index ebbc00464e0727..7f2d8e22b9f578 100644 --- a/libunwind/test/frameheadercache_test.pass.cpp +++ b/libunwind/test/frameheadercache_test.pass.cpp @@ -3,27 +3,10 @@ #include "../src/config.h" // Only run this test under supported configurations. -// The frame header cache should work fine for other architectures, -// but the #ifdefs end up being even more complicated than this. -#if defined(__x86_64__) && defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE) - -// This #if chain is ugly, but see the comments in AddressSpace.hpp for -// the reasoning. - -#ifdef __APPLE__ -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_WIN32) -int main() { return 0; } -#elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) -int main() { return 0; } -#elif defined(_LIBUNWIND_ARM_EHABI) || defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#if defined(_LIBUNWIND_USE_DL_ITERATE_PHDR) && \ + defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) && \ + defined(_LIBUNWIND_USE_FRAME_HEADER_CACHE) #include #include @@ -84,9 +67,7 @@ int main() { abort(); return 0; } -#else -int main() { return 0; } -#endif + #else int main() { return 0;} #endif From 844e94a5026eea19f1f8091121ad05684f28d047 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 8 Sep 2020 15:48:47 -0700 Subject: [PATCH 125/161] [SelectionDAGBuilder] Remove Unnecessary FastMathFlags temporary. Use SDNodeFlags instead. NFCI This was a missed simplication in D87200 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2d42eb73606639..7bcbb7ccddc8d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8950,16 +8950,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; - FastMathFlags FMF; SDNodeFlags SDFlags; - if (auto *FPMO = dyn_cast(&I)) { - FMF = FPMO->getFastMathFlags(); + if (auto *FPMO = dyn_cast(&I)) SDFlags.copyFMF(*FPMO); - } switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_v2_fadd: - if (FMF.allowReassoc()) + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), SDFlags); @@ -8967,7 +8964,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2, SDFlags); break; case Intrinsic::experimental_vector_reduce_v2_fmul: - if (FMF.allowReassoc()) + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), SDFlags); From e7b40c5492e5c4b182df421892136d2ee6868124 Mon Sep 17 00:00:00 2001 From: Sergej Jaskiewicz Date: Wed, 9 Sep 2020 01:53:01 +0300 Subject: [PATCH 126/161] [llvm] [unittest] Allow getting a C string from the TempDir helper class The TempDir.path() member function returns a StringRef. We've been calling the data() method on that StringRef, which does not guarantee to return a null-terminated string (required by chdir and other POSIX functions). Introduce the c_str() method in the TempDir class, which returns the proper string without the need to create a copy of the path at use site. --- llvm/include/llvm/Testing/Support/SupportHelpers.h | 3 +++ llvm/unittests/Support/LockFileManagerTest.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Testing/Support/SupportHelpers.h b/llvm/include/llvm/Testing/Support/SupportHelpers.h index 3517361041b940..2419fc95d8178c 100644 --- a/llvm/include/llvm/Testing/Support/SupportHelpers.h +++ b/llvm/include/llvm/Testing/Support/SupportHelpers.h @@ -152,6 +152,9 @@ class TempDir { /// The path to the temporary directory. StringRef path() const { return Path; } + /// The null-terminated C string pointing to the path. + const char *c_str() { return Path.c_str(); } + /// Creates a new path by appending the argument to the path of the managed /// directory using the native path separator. SmallString<128> path(StringRef component) const { diff --git a/llvm/unittests/Support/LockFileManagerTest.cpp b/llvm/unittests/Support/LockFileManagerTest.cpp index 587e442be19660..0b5a0d982a8fcb 100644 --- a/llvm/unittests/Support/LockFileManagerTest.cpp +++ b/llvm/unittests/Support/LockFileManagerTest.cpp @@ -81,7 +81,7 @@ TEST(LockFileManagerTest, RelativePath) { char PathBuf[1024]; const char *OrigPath = getcwd(PathBuf, 1024); - ASSERT_FALSE(chdir(LockFileManagerTestDir.path().data())); + ASSERT_FALSE(chdir(LockFileManagerTestDir.c_str())); TempDir inner("inner"); SmallString<64> LockedFile(inner.path()); From efc17c4bc668ada7d6274879bd5bccdb32436fa2 Mon Sep 17 00:00:00 2001 From: Puyan Lotfi Date: Tue, 8 Sep 2020 19:42:38 -0400 Subject: [PATCH 127/161] [NFC] Fixing a gcc compiler warning. warning: type qualifiers ignored on cast result type [-Wignored-qualifiers] Differential Revision: https://reviews.llvm.org/D86952 --- llvm/include/llvm/CodeGen/StableHashing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/StableHashing.h b/llvm/include/llvm/CodeGen/StableHashing.h index c6113aa93c8001..caf27e152e78f6 100644 --- a/llvm/include/llvm/CodeGen/StableHashing.h +++ b/llvm/include/llvm/CodeGen/StableHashing.h @@ -40,7 +40,7 @@ inline void stable_hash_append(stable_hash &Hash, const char Value) { inline void stable_hash_append(stable_hash &Hash, stable_hash Value) { for (unsigned I = 0; I < 8; ++I) { - stable_hash_append(Hash, (const char)Value); + stable_hash_append(Hash, static_cast(Value)); Value >>= 8; } } From be561fad1ebe531232dfb2c90577c612d9e08039 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 8 Sep 2020 16:12:46 -0700 Subject: [PATCH 128/161] Remove unused variable(s) --- llvm/lib/Extensions/Extensions.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Extensions/Extensions.cpp b/llvm/lib/Extensions/Extensions.cpp index 2fe537f91876ad..0d25cbda38e004 100644 --- a/llvm/lib/Extensions/Extensions.cpp +++ b/llvm/lib/Extensions/Extensions.cpp @@ -8,7 +8,7 @@ namespace llvm { namespace details { void extensions_anchor() { #define HANDLE_EXTENSION(Ext) \ - static auto Ext = get##Ext##PluginInfo(); + get##Ext##PluginInfo(); #include "llvm/Support/Extension.def" } } From 055d2095898dfbb58b71322c02fbba7e71e8f76a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 2 Sep 2020 14:05:41 -0500 Subject: [PATCH 129/161] Handle masked loads and stores in MemoryLocation/Dependence Differential Revision: https://reviews.llvm.org/D87061 --- .../lib/Analysis/MemoryDependenceAnalysis.cpp | 23 ++++++++++++++++++- llvm/lib/Analysis/MemoryLocation.cpp | 15 ++++++++++++ llvm/test/Transforms/GVN/masked-load-store.ll | 6 +++-- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 2428d57d2809fa..a19c1d78526b23 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -166,6 +166,12 @@ static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, // These intrinsics don't really modify the memory, but returning Mod // will allow them to be handled conservatively. return ModRefInfo::Mod; + case Intrinsic::masked_load: + Loc = MemoryLocation::getForArgument(II, 0, TLI); + return ModRefInfo::Ref; + case Intrinsic::masked_store: + Loc = MemoryLocation::getForArgument(II, 1, TLI); + return ModRefInfo::Mod; default: break; } @@ -442,7 +448,9 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (IntrinsicInst *II = dyn_cast(Inst)) { // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. - if (II->getIntrinsicID() == Intrinsic::lifetime_start) { + Intrinsic::ID ID = II->getIntrinsicID(); + switch (ID) { + case Intrinsic::lifetime_start: // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point (the right approach is to use @@ -450,6 +458,19 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( if (BatchAA.isMustAlias(MemoryLocation(II->getArgOperand(1)), MemLoc)) return MemDepResult::getDef(II); continue; + case Intrinsic::masked_load: + case Intrinsic::masked_store: { + MemoryLocation Loc; + /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI); + AliasResult R = BatchAA.alias(Loc, MemLoc); + if (R == NoAlias) + continue; + if (R == MustAlias) + return MemDepResult::getDef(II); + if (ID == Intrinsic::masked_load) + continue; + return MemDepResult::getClobber(II); + } } } diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 9694036ce4767c..fcea03a118bfc7 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -176,6 +176,21 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call, cast(II->getArgOperand(0))->getZExtValue()), AATags); + case Intrinsic::masked_load: + assert(ArgIdx == 0 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound(DL.getTypeStoreSize(II->getType())), + AATags); + + case Intrinsic::masked_store: + assert(ArgIdx == 1 && "Invalid argument index"); + return MemoryLocation( + Arg, + LocationSize::upperBound( + DL.getTypeStoreSize(II->getArgOperand(0)->getType())), + AATags); + case Intrinsic::invariant_end: // The first argument to an invariant.end is a "descriptor" type (e.g. a // pointer to a empty struct) which is never actually dereferenced. diff --git a/llvm/test/Transforms/GVN/masked-load-store.ll b/llvm/test/Transforms/GVN/masked-load-store.ll index 8119d77bb76e05..0b71a10a067db0 100644 --- a/llvm/test/Transforms/GVN/masked-load-store.ll +++ b/llvm/test/Transforms/GVN/masked-load-store.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -gvn -S < %s | FileCheck %s +; Check that in both cases the second load is recognized as redundant +; and is removed. + define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-LABEL: @f0( ; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] @@ -21,8 +24,7 @@ define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { ; CHECK-NEXT: [[V1:%.*]] = getelementptr <128 x i8>, <128 x i8>* [[A0:%.*]], i32 1 ; CHECK-NEXT: [[V2:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) ; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A2]], <128 x i8>* [[V1]], i32 4, <128 x i1> [[V0]]) -; CHECK-NEXT: [[V3:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) -; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V3]] +; CHECK-NEXT: [[V4:%.*]] = add <128 x i8> [[V2]], [[V2]] ; CHECK-NEXT: ret <128 x i8> [[V4]] ; %v0 = icmp eq <128 x i8> %a1, %a2 From 4013bab9c4a5fe634be6271779a99bc158c3e396 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Tue, 8 Sep 2020 16:42:16 -0700 Subject: [PATCH 130/161] [NFC][ThinLTO] EmbedBitcodeSection doesn't need the Config Instead, passing in the command line options, initialized to nullptr. In an upcoming patch, we can then use the parameter to pass actual command line options. Differential Revision: https://reviews.llvm.org/D87336 --- llvm/lib/LTO/LTOBackend.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index ca29548a4d7ca5..65d8669604950e 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -350,7 +350,7 @@ static cl::opt EmbedBitcode( "lto-embed-bitcode", cl::init(false), cl::desc("Embed LLVM bitcode in object files produced by LTO")); -static void EmitBitcodeSection(Module &M, const Config &Conf) { +static void EmitBitcodeSection(Module &M) { if (!EmbedBitcode) return; SmallVector Buffer; @@ -369,7 +369,7 @@ void codegen(const Config &Conf, TargetMachine *TM, AddStreamFn AddStream, if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) return; - EmitBitcodeSection(Mod, Conf); + EmitBitcodeSection(Mod); std::unique_ptr DwoOut; SmallString<1024> DwoFile(Conf.SplitDwarfOutput); From 4682f654031c346106463d37ac44e44b0c9856dc Mon Sep 17 00:00:00 2001 From: Xing GUO Date: Wed, 9 Sep 2020 08:48:04 +0800 Subject: [PATCH 131/161] [obj2yaml][test] Test generating and dumping a broken debug_ranges section. This patch tests generating and dumping a broken debug_ranges section. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D87275 --- .../ObjectYAML/MachO/DWARF-debug_ranges.yaml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml index 30997ba1144b62..5aea820145cf73 100644 --- a/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml +++ b/llvm/test/ObjectYAML/MachO/DWARF-debug_ranges.yaml @@ -284,3 +284,27 @@ LoadCommands: reserved2: 0x00000000 reserved3: 0x00000000 content: [[CONTENT=]] + +## Test generating and dumping a __debug_ranges section whose size isn't a +## multiple of the address size. This test case is to ensure that when the +## parser fails, the content of the __debug_ranges section will be dumped into +## the 'content' entry and the 'debug_ranges' entry will not exist. + +# RUN: yaml2obj --docnum=2 -DSIZE=3 -DCONTENT='010203' %s | obj2yaml | FileCheck %s --check-prefix=FAILS + +# FAILS-NOT: DWARF: +# FAILS: Sections: +# FAILS-NEXT: - sectname: __debug_ranges +# FAILS-NEXT: segname: __DWARF +# FAILS-NEXT: addr: 0x0000000000000000 +# FAILS-NEXT: size: 3 +# FAILS-NEXT: offset: 0x00000210 +# FAILS-NEXT: align: 0 +# FAILS-NEXT: reloff: 0x00000000 +# FAILS-NEXT: nreloc: 0 +# FAILS-NEXT: flags: 0x00000000 +# FAILS-NEXT: reserved1: 0x00000000 +# FAILS-NEXT: reserved2: 0x00000000 +# FAILS-NEXT: reserved3: 0x00000000 +# FAILS-NEXT: content: '010203' +# FAILS-NEXT: ... From 889cf9bedff1e4516c6caea5a8a214adbdde0102 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 8 Sep 2020 19:27:37 -0500 Subject: [PATCH 132/161] [EarlyCSE] Add testcase for masked loads and stores, NFC --- .../Transforms/EarlyCSE/masked-intrinsics.ll | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll diff --git a/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll b/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll new file mode 100644 index 00000000000000..77183ab97a6b0f --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/masked-intrinsics.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -early-cse < %s | FileCheck %s + +define <128 x i8> @f0(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f0( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[A1]], <128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: ret <128 x i8> [[V1]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %a1, <128 x i8>* %a0, i32 4, <128 x i1> %v0) + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + ret <128 x i8> %v1 +} + +define <128 x i8> @f1(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> [[V1]], <128 x i8>* [[A0]], i32 4, <128 x i1> [[V0]]) +; CHECK-NEXT: ret <128 x i8> [[V1]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + call void @llvm.masked.store.v128i8.p0v128i8(<128 x i8> %v1, <128 x i8>* %a0, i32 4, <128 x i1> %v0) + ret <128 x i8> %v1 +} + +define <128 x i8> @f2(<128 x i8>* %a0, <128 x i8> %a1, <128 x i8> %a2) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: [[V0:%.*]] = icmp eq <128 x i8> [[A1:%.*]], [[A2:%.*]] +; CHECK-NEXT: [[V1:%.*]] = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* [[A0:%.*]], i32 4, <128 x i1> [[V0]], <128 x i8> undef) +; CHECK-NEXT: [[V3:%.*]] = add <128 x i8> [[V1]], [[V1]] +; CHECK-NEXT: ret <128 x i8> [[V3]] +; + %v0 = icmp eq <128 x i8> %a1, %a2 + %v1 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v2 = call <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>* %a0, i32 4, <128 x i1> %v0, <128 x i8> undef) + %v3 = add <128 x i8> %v1, %v2 + ret <128 x i8> %v3 +} + +declare <128 x i8> @llvm.masked.load.v128i8.p0v128i8(<128 x i8>*, i32, <128 x i1>, <128 x i8>) +declare void @llvm.masked.store.v128i8.p0v128i8(<128 x i8>, <128 x i8>*, i32, <128 x i1>) From 88b368a1c47bca536f03041f7464235b94ea98a1 Mon Sep 17 00:00:00 2001 From: Brad Smith Date: Tue, 8 Sep 2020 21:21:14 -0400 Subject: [PATCH 133/161] [PowerPC] Set setMaxAtomicSizeInBitsSupported appropriately for 32-bit PowerPC in PPCTargetLowering Reviewed By: nemanjai Differential Revision: https://reviews.llvm.org/D86165 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 + llvm/test/CodeGen/PowerPC/atomics-indexed.ll | 140 ++++-- llvm/test/CodeGen/PowerPC/atomics.ll | 437 ++++++++++++++++--- 3 files changed, 503 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f6b5d2ea987f8c..f542a8018b4f0d 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1199,6 +1199,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLibcallName(RTLIB::SRA_I128, nullptr); } + if (!isPPC64) + setMaxAtomicSizeInBitsSupported(32); + setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: diff --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll index b4790adfd90889..cf7225a5fc2005 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 ; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction). ; This is already checked for in Atomics-64.ll @@ -8,9 +9,25 @@ ; Indexed version of loads define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) { -; CHECK-LABEL: load_x_i8_seq_cst -; CHECK: sync -; CHECK: lbzx [[VAL:r[0-9]+]] +; PPC32-LABEL: load_x_i8_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: lis r4, 1 +; PPC32-NEXT: sync +; PPC32-NEXT: ori r4, r4, 24464 +; PPC32-NEXT: lbzx r3, r3, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i8_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 1 +; PPC64-NEXT: sync +; PPC64-NEXT: ori r4, r4, 24464 +; PPC64-NEXT: lbzx r3, r3, r4 +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] ; CHECK-PPC64: bne- [[CR]], .+4 @@ -20,8 +37,23 @@ define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) { ret i8 %val } define i16 @load_x_i16_acquire([100000 x i16]* %mem) { -; CHECK-LABEL: load_x_i16_acquire -; CHECK: lhzx [[VAL:r[0-9]+]] +; PPC32-LABEL: load_x_i16_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: lis r4, 2 +; PPC32-NEXT: ori r4, r4, 48928 +; PPC32-NEXT: lhzx r3, r3, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i16_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 2 +; PPC64-NEXT: ori r4, r4, 48928 +; PPC64-NEXT: lhzx r3, r3, r4 +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] ; CHECK-PPC64: bne- [[CR]], .+4 @@ -31,19 +63,39 @@ define i16 @load_x_i16_acquire([100000 x i16]* %mem) { ret i16 %val } define i32 @load_x_i32_monotonic([100000 x i32]* %mem) { -; CHECK-LABEL: load_x_i32_monotonic -; CHECK: lwzx -; CHECK-NOT: sync +; CHECK-LABEL: load_x_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 5 +; CHECK-NEXT: ori r4, r4, 32320 +; CHECK-NEXT: lwzx r3, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000 %val = load atomic i32, i32* %ptr monotonic, align 4 ret i32 %val } define i64 @load_x_i64_unordered([100000 x i64]* %mem) { -; CHECK-LABEL: load_x_i64_unordered -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: ldx -; CHECK-NOT: sync +; PPC32-LABEL: load_x_i64_unordered: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: addi r3, r3, -896 +; PPC32-NEXT: addis r3, r3, 11 +; PPC32-NEXT: li r4, 0 +; PPC32-NEXT: bl __atomic_load_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_x_i64_unordered: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 10 +; PPC64-NEXT: ori r4, r4, 64640 +; PPC64-NEXT: ldx r3, r3, r4 +; PPC64-NEXT: blr %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 %val = load atomic i64, i64* %ptr unordered, align 8 ret i64 %val @@ -51,35 +103,69 @@ define i64 @load_x_i64_unordered([100000 x i64]* %mem) { ; Indexed version of stores define void @store_x_i8_seq_cst([100000 x i8]* %mem) { -; CHECK-LABEL: store_x_i8_seq_cst -; CHECK: sync -; CHECK: stbx +; CHECK-LABEL: store_x_i8_seq_cst: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 1 +; CHECK-NEXT: ori r4, r4, 24464 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: sync +; CHECK-NEXT: stbx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000 store atomic i8 42, i8* %ptr seq_cst, align 1 ret void } define void @store_x_i16_release([100000 x i16]* %mem) { -; CHECK-LABEL: store_x_i16_release -; CHECK: lwsync -; CHECK: sthx +; CHECK-LABEL: store_x_i16_release: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 2 +; CHECK-NEXT: ori r4, r4, 48928 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: lwsync +; CHECK-NEXT: sthx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000 store atomic i16 42, i16* %ptr release, align 2 ret void } define void @store_x_i32_monotonic([100000 x i32]* %mem) { -; CHECK-LABEL: store_x_i32_monotonic -; CHECK-NOT: sync -; CHECK: stwx +; CHECK-LABEL: store_x_i32_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lis r4, 5 +; CHECK-NEXT: ori r4, r4, 32320 +; CHECK-NEXT: li r5, 42 +; CHECK-NEXT: stwx r5, r3, r4 +; CHECK-NEXT: blr %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000 store atomic i32 42, i32* %ptr monotonic, align 4 ret void } define void @store_x_i64_unordered([100000 x i64]* %mem) { -; CHECK-LABEL: store_x_i64_unordered -; CHECK-NOT: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: stdx +; PPC32-LABEL: store_x_i64_unordered: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: addi r3, r3, -896 +; PPC32-NEXT: addis r3, r3, 11 +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: li r6, 42 +; PPC32-NEXT: li r7, 0 +; PPC32-NEXT: bl __atomic_store_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_x_i64_unordered: +; PPC64: # %bb.0: +; PPC64-NEXT: lis r4, 10 +; PPC64-NEXT: ori r4, r4, 64640 +; PPC64-NEXT: li r5, 42 +; PPC64-NEXT: stdx r5, r3, r4 +; PPC64-NEXT: blr %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000 store atomic i64 42, i64* %ptr unordered, align 8 ret void diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll index c964218cb60bf6..008cd4c7157c1f 100644 --- a/llvm/test/CodeGen/PowerPC/atomics.ll +++ b/llvm/test/CodeGen/PowerPC/atomics.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32 ; This is already checked for in Atomics-64.ll ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -ppc-asm-full-reg-names | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64 @@ -9,22 +10,35 @@ ; We first check loads, for all sizes from i8 to i64. ; We also vary orderings to check for barriers. define i8 @load_i8_unordered(i8* %mem) { -; CHECK-LABEL: load_i8_unordered -; CHECK: lbz -; CHECK-NOT: sync +; CHECK-LABEL: load_i8_unordered: +; CHECK: # %bb.0: +; CHECK-NEXT: lbz r3, 0(r3) +; CHECK-NEXT: blr %val = load atomic i8, i8* %mem unordered, align 1 ret i8 %val } define i16 @load_i16_monotonic(i16* %mem) { -; CHECK-LABEL: load_i16_monotonic -; CHECK: lhz -; CHECK-NOT: sync +; CHECK-LABEL: load_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: lhz r3, 0(r3) +; CHECK-NEXT: blr %val = load atomic i16, i16* %mem monotonic, align 2 ret i16 %val } define i32 @load_i32_acquire(i32* %mem) { -; CHECK-LABEL: load_i32_acquire -; CHECK: lwz [[VAL:r[0-9]+]] +; PPC32-LABEL: load_i32_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: lwz r3, 0(r3) +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_i32_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: lwz r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr %val = load atomic i32, i32* %mem acquire, align 4 ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] @@ -33,11 +47,28 @@ define i32 @load_i32_acquire(i32* %mem) { ret i32 %val } define i64 @load_i64_seq_cst(i64* %mem) { -; CHECK-LABEL: load_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: ld [[VAL:r[0-9]+]] +; PPC32-LABEL: load_i64_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r4, 5 +; PPC32-NEXT: bl __atomic_load_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: load_i64_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: sync +; PPC64-NEXT: ld r3, 0(r3) +; PPC64-NEXT: cmpd cr7, r3, r3 +; PPC64-NEXT: bne- cr7, .+4 +; PPC64-NEXT: isync +; PPC64-NEXT: blr %val = load atomic i64, i64* %mem seq_cst, align 8 ; CHECK-PPC32: lwsync ; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]] @@ -48,95 +79,401 @@ define i64 @load_i64_seq_cst(i64* %mem) { ; Stores define void @store_i8_unordered(i8* %mem) { -; CHECK-LABEL: store_i8_unordered -; CHECK-NOT: sync -; CHECK: stb +; CHECK-LABEL: store_i8_unordered: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: stb r4, 0(r3) +; CHECK-NEXT: blr store atomic i8 42, i8* %mem unordered, align 1 ret void } define void @store_i16_monotonic(i16* %mem) { -; CHECK-LABEL: store_i16_monotonic -; CHECK-NOT: sync -; CHECK: sth +; CHECK-LABEL: store_i16_monotonic: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: sth r4, 0(r3) +; CHECK-NEXT: blr store atomic i16 42, i16* %mem monotonic, align 2 ret void } define void @store_i32_release(i32* %mem) { -; CHECK-LABEL: store_i32_release -; CHECK: lwsync -; CHECK: stw +; CHECK-LABEL: store_i32_release: +; CHECK: # %bb.0: +; CHECK-NEXT: li r4, 42 +; CHECK-NEXT: lwsync +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: blr store atomic i32 42, i32* %mem release, align 4 ret void } define void @store_i64_seq_cst(i64* %mem) { -; CHECK-LABEL: store_i64_seq_cst -; CHECK: sync -; PPC32: __sync_ -; PPC64-NOT: __sync_ -; PPC64: std +; PPC32-LABEL: store_i64_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: li r6, 42 +; PPC32-NEXT: li r7, 5 +; PPC32-NEXT: bl __atomic_store_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: store_i64_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: li r4, 42 +; PPC64-NEXT: sync +; PPC64-NEXT: std r4, 0(r3) +; PPC64-NEXT: blr store atomic i64 42, i64* %mem seq_cst, align 8 ret void } ; Atomic CmpXchg define i8 @cas_strong_i8_sc_sc(i8* %mem) { -; CHECK-LABEL: cas_strong_i8_sc_sc -; CHECK: sync +; PPC32-LABEL: cas_strong_i8_sc_sc: +; PPC32: # %bb.0: +; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28 +; PPC32-NEXT: li r5, 1 +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: li r7, 255 +; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29 +; PPC32-NEXT: xori r3, r8, 24 +; PPC32-NEXT: slw r5, r5, r3 +; PPC32-NEXT: slw r8, r6, r3 +; PPC32-NEXT: slw r6, r7, r3 +; PPC32-NEXT: and r7, r5, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: sync +; PPC32-NEXT: .LBB8_1: +; PPC32-NEXT: lwarx r9, 0, r4 +; PPC32-NEXT: and r5, r9, r6 +; PPC32-NEXT: cmpw r5, r8 +; PPC32-NEXT: bne cr0, .LBB8_3 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: andc r9, r9, r6 +; PPC32-NEXT: or r9, r9, r7 +; PPC32-NEXT: stwcx. r9, 0, r4 +; PPC32-NEXT: bne cr0, .LBB8_1 +; PPC32-NEXT: b .LBB8_4 +; PPC32-NEXT: .LBB8_3: +; PPC32-NEXT: stwcx. r9, 0, r4 +; PPC32-NEXT: .LBB8_4: +; PPC32-NEXT: srw r3, r5, r3 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_strong_i8_sc_sc: +; PPC64: # %bb.0: +; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28 +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: li r7, 255 +; PPC64-NEXT: rldicr r4, r3, 0, 61 +; PPC64-NEXT: xori r3, r8, 24 +; PPC64-NEXT: slw r5, r5, r3 +; PPC64-NEXT: slw r8, r6, r3 +; PPC64-NEXT: slw r6, r7, r3 +; PPC64-NEXT: and r7, r5, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: sync +; PPC64-NEXT: .LBB8_1: +; PPC64-NEXT: lwarx r9, 0, r4 +; PPC64-NEXT: and r5, r9, r6 +; PPC64-NEXT: cmpw r5, r8 +; PPC64-NEXT: bne cr0, .LBB8_3 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: andc r9, r9, r6 +; PPC64-NEXT: or r9, r9, r7 +; PPC64-NEXT: stwcx. r9, 0, r4 +; PPC64-NEXT: bne cr0, .LBB8_1 +; PPC64-NEXT: b .LBB8_4 +; PPC64-NEXT: .LBB8_3: +; PPC64-NEXT: stwcx. r9, 0, r4 +; PPC64-NEXT: .LBB8_4: +; PPC64-NEXT: srw r3, r5, r3 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst -; CHECK: lwsync %loaded = extractvalue { i8, i1} %val, 0 ret i8 %loaded } define i16 @cas_weak_i16_acquire_acquire(i16* %mem) { -; CHECK-LABEL: cas_weak_i16_acquire_acquire -;CHECK-NOT: sync +; PPC32-LABEL: cas_weak_i16_acquire_acquire: +; PPC32: # %bb.0: +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27 +; PPC32-NEXT: li r5, 1 +; PPC32-NEXT: ori r7, r6, 65535 +; PPC32-NEXT: xori r4, r4, 16 +; PPC32-NEXT: slw r8, r5, r4 +; PPC32-NEXT: slw r9, r6, r4 +; PPC32-NEXT: slw r5, r7, r4 +; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29 +; PPC32-NEXT: and r6, r8, r5 +; PPC32-NEXT: and r8, r9, r5 +; PPC32-NEXT: .LBB9_1: +; PPC32-NEXT: lwarx r9, 0, r3 +; PPC32-NEXT: and r7, r9, r5 +; PPC32-NEXT: cmpw r7, r8 +; PPC32-NEXT: bne cr0, .LBB9_3 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: andc r9, r9, r5 +; PPC32-NEXT: or r9, r9, r6 +; PPC32-NEXT: stwcx. r9, 0, r3 +; PPC32-NEXT: bne cr0, .LBB9_1 +; PPC32-NEXT: b .LBB9_4 +; PPC32-NEXT: .LBB9_3: +; PPC32-NEXT: stwcx. r9, 0, r3 +; PPC32-NEXT: .LBB9_4: +; PPC32-NEXT: srw r3, r7, r4 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_weak_i16_acquire_acquire: +; PPC64: # %bb.0: +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27 +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: ori r7, r6, 65535 +; PPC64-NEXT: xori r4, r4, 16 +; PPC64-NEXT: slw r8, r5, r4 +; PPC64-NEXT: slw r9, r6, r4 +; PPC64-NEXT: slw r5, r7, r4 +; PPC64-NEXT: rldicr r3, r3, 0, 61 +; PPC64-NEXT: and r6, r8, r5 +; PPC64-NEXT: and r8, r9, r5 +; PPC64-NEXT: .LBB9_1: +; PPC64-NEXT: lwarx r9, 0, r3 +; PPC64-NEXT: and r7, r9, r5 +; PPC64-NEXT: cmpw r7, r8 +; PPC64-NEXT: bne cr0, .LBB9_3 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: andc r9, r9, r5 +; PPC64-NEXT: or r9, r9, r6 +; PPC64-NEXT: stwcx. r9, 0, r3 +; PPC64-NEXT: bne cr0, .LBB9_1 +; PPC64-NEXT: b .LBB9_4 +; PPC64-NEXT: .LBB9_3: +; PPC64-NEXT: stwcx. r9, 0, r3 +; PPC64-NEXT: .LBB9_4: +; PPC64-NEXT: srw r3, r7, r4 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire -; CHECK: lwsync %loaded = extractvalue { i16, i1} %val, 0 ret i16 %loaded } define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) { -; CHECK-LABEL: cas_strong_i32_acqrel_acquire -; CHECK: lwsync +; CHECK-LABEL: cas_strong_i32_acqrel_acquire: +; CHECK: # %bb.0: +; CHECK-NEXT: li r5, 1 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB10_1: +; CHECK-NEXT: lwarx r4, 0, r3 +; CHECK-NEXT: cmpw r6, r4 +; CHECK-NEXT: bne cr0, .LBB10_3 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: stwcx. r5, 0, r3 +; CHECK-NEXT: bne cr0, .LBB10_1 +; CHECK-NEXT: b .LBB10_4 +; CHECK-NEXT: .LBB10_3: +; CHECK-NEXT: stwcx. r4, 0, r3 +; CHECK-NEXT: .LBB10_4: +; CHECK-NEXT: mr r3, r4 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire -; CHECK: lwsync %loaded = extractvalue { i32, i1} %val, 0 ret i32 %loaded } define i64 @cas_weak_i64_release_monotonic(i64* %mem) { -; CHECK-LABEL: cas_weak_i64_release_monotonic -; CHECK: lwsync +; PPC32-LABEL: cas_weak_i64_release_monotonic: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r4, 0 +; PPC32-NEXT: stw r4, 12(r1) +; PPC32-NEXT: li r5, 0 +; PPC32-NEXT: stw r4, 8(r1) +; PPC32-NEXT: addi r4, r1, 8 +; PPC32-NEXT: li r6, 1 +; PPC32-NEXT: li r7, 3 +; PPC32-NEXT: li r8, 0 +; PPC32-NEXT: bl __atomic_compare_exchange_8 +; PPC32-NEXT: lwz r4, 12(r1) +; PPC32-NEXT: lwz r3, 8(r1) +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: cas_weak_i64_release_monotonic: +; PPC64: # %bb.0: +; PPC64-NEXT: li r5, 1 +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: lwsync +; PPC64-NEXT: .LBB11_1: +; PPC64-NEXT: ldarx r4, 0, r3 +; PPC64-NEXT: cmpd r6, r4 +; PPC64-NEXT: bne cr0, .LBB11_4 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: stdcx. r5, 0, r3 +; PPC64-NEXT: bne cr0, .LBB11_1 +; PPC64-NEXT: # %bb.3: +; PPC64-NEXT: mr r3, r4 +; PPC64-NEXT: blr +; PPC64-NEXT: .LBB11_4: +; PPC64-NEXT: stdcx. r4, 0, r3 +; PPC64-NEXT: mr r3, r4 +; PPC64-NEXT: blr %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic -; CHECK-NOT: [sync ] %loaded = extractvalue { i64, i1} %val, 0 ret i64 %loaded } ; AtomicRMW define i8 @add_i8_monotonic(i8* %mem, i8 %operand) { -; CHECK-LABEL: add_i8_monotonic -; CHECK-NOT: sync +; PPC32-LABEL: add_i8_monotonic: +; PPC32: # %bb.0: +; PPC32-NEXT: rlwinm r7, r3, 3, 27, 28 +; PPC32-NEXT: li r6, 255 +; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 +; PPC32-NEXT: xori r3, r7, 24 +; PPC32-NEXT: slw r4, r4, r3 +; PPC32-NEXT: slw r6, r6, r3 +; PPC32-NEXT: .LBB12_1: +; PPC32-NEXT: lwarx r7, 0, r5 +; PPC32-NEXT: add r8, r4, r7 +; PPC32-NEXT: andc r9, r7, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: or r8, r8, r9 +; PPC32-NEXT: stwcx. r8, 0, r5 +; PPC32-NEXT: bne cr0, .LBB12_1 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: blr +; +; PPC64-LABEL: add_i8_monotonic: +; PPC64: # %bb.0: +; PPC64-NEXT: rlwinm r7, r3, 3, 27, 28 +; PPC64-NEXT: li r6, 255 +; PPC64-NEXT: rldicr r5, r3, 0, 61 +; PPC64-NEXT: xori r3, r7, 24 +; PPC64-NEXT: slw r4, r4, r3 +; PPC64-NEXT: slw r6, r6, r3 +; PPC64-NEXT: .LBB12_1: +; PPC64-NEXT: lwarx r7, 0, r5 +; PPC64-NEXT: add r8, r4, r7 +; PPC64-NEXT: andc r9, r7, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: or r8, r8, r9 +; PPC64-NEXT: stwcx. r8, 0, r5 +; PPC64-NEXT: bne cr0, .LBB12_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: blr %val = atomicrmw add i8* %mem, i8 %operand monotonic ret i8 %val } define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) { -; CHECK-LABEL: xor_i16_seq_cst -; CHECK: sync +; PPC32-LABEL: xor_i16_seq_cst: +; PPC32: # %bb.0: +; PPC32-NEXT: li r6, 0 +; PPC32-NEXT: rlwinm r7, r3, 3, 27, 27 +; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29 +; PPC32-NEXT: ori r6, r6, 65535 +; PPC32-NEXT: xori r3, r7, 16 +; PPC32-NEXT: slw r4, r4, r3 +; PPC32-NEXT: slw r6, r6, r3 +; PPC32-NEXT: sync +; PPC32-NEXT: .LBB13_1: +; PPC32-NEXT: lwarx r7, 0, r5 +; PPC32-NEXT: xor r8, r4, r7 +; PPC32-NEXT: andc r9, r7, r6 +; PPC32-NEXT: and r8, r8, r6 +; PPC32-NEXT: or r8, r8, r9 +; PPC32-NEXT: stwcx. r8, 0, r5 +; PPC32-NEXT: bne cr0, .LBB13_1 +; PPC32-NEXT: # %bb.2: +; PPC32-NEXT: srw r3, r7, r3 +; PPC32-NEXT: lwsync +; PPC32-NEXT: blr +; +; PPC64-LABEL: xor_i16_seq_cst: +; PPC64: # %bb.0: +; PPC64-NEXT: li r6, 0 +; PPC64-NEXT: rlwinm r7, r3, 3, 27, 27 +; PPC64-NEXT: rldicr r5, r3, 0, 61 +; PPC64-NEXT: ori r6, r6, 65535 +; PPC64-NEXT: xori r3, r7, 16 +; PPC64-NEXT: slw r4, r4, r3 +; PPC64-NEXT: slw r6, r6, r3 +; PPC64-NEXT: sync +; PPC64-NEXT: .LBB13_1: +; PPC64-NEXT: lwarx r7, 0, r5 +; PPC64-NEXT: xor r8, r4, r7 +; PPC64-NEXT: andc r9, r7, r6 +; PPC64-NEXT: and r8, r8, r6 +; PPC64-NEXT: or r8, r8, r9 +; PPC64-NEXT: stwcx. r8, 0, r5 +; PPC64-NEXT: bne cr0, .LBB13_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: srw r3, r7, r3 +; PPC64-NEXT: lwsync +; PPC64-NEXT: blr %val = atomicrmw xor i16* %mem, i16 %operand seq_cst -; CHECK: lwsync ret i16 %val } define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) { -; CHECK-LABEL: xchg_i32_acq_rel -; CHECK: lwsync +; CHECK-LABEL: xchg_i32_acq_rel: +; CHECK: # %bb.0: +; CHECK-NEXT: lwsync +; CHECK-NEXT: .LBB14_1: +; CHECK-NEXT: lwarx r5, 0, r3 +; CHECK-NEXT: stwcx. r4, 0, r3 +; CHECK-NEXT: bne cr0, .LBB14_1 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: mr r3, r5 +; CHECK-NEXT: lwsync +; CHECK-NEXT: blr %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel -; CHECK: lwsync ret i32 %val } define i64 @and_i64_release(i64* %mem, i64 %operand) { -; CHECK-LABEL: and_i64_release -; CHECK: lwsync +; PPC32-LABEL: and_i64_release: +; PPC32: # %bb.0: +; PPC32-NEXT: mflr r0 +; PPC32-NEXT: stw r0, 4(r1) +; PPC32-NEXT: stwu r1, -16(r1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: li r7, 3 +; PPC32-NEXT: bl __atomic_fetch_and_8 +; PPC32-NEXT: lwz r0, 20(r1) +; PPC32-NEXT: addi r1, r1, 16 +; PPC32-NEXT: mtlr r0 +; PPC32-NEXT: blr +; +; PPC64-LABEL: and_i64_release: +; PPC64: # %bb.0: +; PPC64-NEXT: lwsync +; PPC64-NEXT: .LBB15_1: +; PPC64-NEXT: ldarx r5, 0, r3 +; PPC64-NEXT: and r6, r4, r5 +; PPC64-NEXT: stdcx. r6, 0, r3 +; PPC64-NEXT: bne cr0, .LBB15_1 +; PPC64-NEXT: # %bb.2: +; PPC64-NEXT: mr r3, r5 +; PPC64-NEXT: blr %val = atomicrmw and i64* %mem, i64 %operand release -; CHECK-NOT: [sync ] ret i64 %val } From b9d086693b5baebc477793af0d86a447bae01b6f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 8 Sep 2020 18:45:11 -0700 Subject: [PATCH 134/161] [llvm-cov gcov] Compute unmeasured arc counts by Kirchhoff's circuit law For a CFG G=(V,E), Knuth describes that by Kirchoff's circuit law, the minimum number of counters necessary is |E|-(|V|-1). The emitted edges form a spanning tree. libgcov emitted .gcda files leverages this optimization while clang --coverage's doesn't. Propagate counts by Kirchhoff's circuit law so that llvm-cov gcov can correctly print line counts of gcc --coverage emitted files and enable the future improvement of clang --coverage. --- ...rprof-gcov-multiple-bbs-single-line.c.gcov | 2 +- llvm/include/llvm/ProfileData/GCOV.h | 10 +-- llvm/lib/ProfileData/GCOV.cpp | 67 ++++++++++++++----- llvm/test/tools/llvm-cov/gcov-4.7.c | 22 +++--- llvm/test/tools/llvm-cov/gcov-8.c | 32 +++++---- llvm/test/tools/llvm-cov/gcov-9.c | 18 +++-- 6 files changed, 91 insertions(+), 60 deletions(-) diff --git a/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov index d1104b7f5bbf2b..4debf8fc1b680d 100644 --- a/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov +++ b/compiler-rt/test/profile/Inputs/instrprof-gcov-multiple-bbs-single-line.c.gcov @@ -3,7 +3,7 @@ // CHECK-NEXT: -: 0:Data:instrprof-gcov-multiple-bbs-single-line.gcda // CHECK-NEXT: -: 0:Runs:1 // CHECK-NEXT: -: 0:Programs:1 -// CHECK-NEXT:function main called 1 returned 100% blocks executed 80% +// CHECK-NEXT:function main called 1 returned 100% blocks executed 77% // CHECK-NEXT: 1: 1:int main(void) // CHECK-NEXT: -: 2:{ // CHECK-NEXT: -: 3: int var; diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h index 7b9ba4410b654a..f87eab6d3ead2f 100644 --- a/llvm/include/llvm/ProfileData/GCOV.h +++ b/llvm/include/llvm/ProfileData/GCOV.h @@ -212,12 +212,13 @@ class GCOVFile { }; struct GCOVArc { - GCOVArc(GCOVBlock &src, GCOVBlock &dst, bool fallthrough) - : src(src), dst(dst), fallthrough(fallthrough) {} + GCOVArc(GCOVBlock &src, GCOVBlock &dst, uint32_t flags) + : src(src), dst(dst), flags(flags) {} + bool onTree() const; GCOVBlock &src; GCOVBlock &dst; - bool fallthrough; + uint32_t flags; uint64_t Count = 0; uint64_t CyclesCount = 0; }; @@ -234,7 +235,7 @@ class GCOVFunction { StringRef getFilename() const; size_t getNumBlocks() const { return Blocks.size(); } uint64_t getEntryCount() const; - uint64_t getExitCount() const; + GCOVBlock &getExitBlock() const; BlockIterator block_begin() const { return Blocks.begin(); } BlockIterator block_end() const { return Blocks.end(); } @@ -242,6 +243,7 @@ class GCOVFunction { return make_range(block_begin(), block_end()); } + uint64_t propagateCounts(const GCOVBlock &v, GCOVArc *arc); void print(raw_ostream &OS) const; void dump() const; void collectLineCounts(FileInfo &FI); diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 7b97723da60cc3..0292e2a09d17c1 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -108,11 +108,10 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) { for (uint32_t i = 0, e = (length - 1) / 2; i != e; ++i) { uint32_t dstNo = buf.getWord(), flags = buf.getWord(); GCOVBlock *dst = fn->Blocks[dstNo].get(); - auto arc = - std::make_unique(*src, *dst, flags & GCOV_ARC_FALLTHROUGH); + auto arc = std::make_unique(*src, *dst, flags); src->addDstEdge(arc.get()); dst->addSrcEdge(arc.get()); - if (flags & GCOV_ARC_ON_TREE) + if (arc->onTree()) fn->treeArcs.push_back(std::move(arc)); else fn->arcs.push_back(std::move(arc)); @@ -226,6 +225,17 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) { if (arc->dst.succ.empty()) arc->dst.Counter += arc->Count; } + + if (fn->Blocks.size() >= 2) { + GCOVBlock &src = *fn->Blocks[0]; + GCOVBlock &sink = + Version < GCOV::V408 ? *fn->Blocks.back() : *fn->Blocks[1]; + auto arc = std::make_unique(sink, src, GCOV_ARC_ON_TREE); + sink.addDstEdge(arc.get()); + src.addSrcEdge(arc.get()); + fn->treeArcs.push_back(std::move(arc)); + fn->propagateCounts(src, nullptr); + } } pos += 4 * length; if (pos < buf.cursor.tell()) @@ -260,6 +270,8 @@ void GCOVFile::collectLineCounts(FileInfo &fi) { fi.setProgramCount(ProgramCount); } +bool GCOVArc::onTree() const { return flags & GCOV_ARC_ON_TREE; } + //===----------------------------------------------------------------------===// // GCOVFunction implementation. @@ -271,10 +283,27 @@ uint64_t GCOVFunction::getEntryCount() const { return Blocks.front()->getCount(); } -/// getExitCount - Get the number of times the function returned by retrieving -/// the exit block's count. -uint64_t GCOVFunction::getExitCount() const { - return Blocks.back()->getCount(); +GCOVBlock &GCOVFunction::getExitBlock() const { + return file.getVersion() < GCOV::V408 ? *Blocks.back() : *Blocks[1]; +} + +// For each basic block, the sum of incoming edge counts equals the sum of +// outgoing edge counts by Kirchoff's circuit law. If the unmeasured arcs form a +// spanning tree, the count for each unmeasured arc (GCOV_ARC_ON_TREE) can be +// uniquely identified. +uint64_t GCOVFunction::propagateCounts(const GCOVBlock &v, GCOVArc *pred) { + uint64_t excess = 0; + for (GCOVArc *e : v.srcs()) + if (e != pred) + excess += e->onTree() ? propagateCounts(e->src, e) : e->Count; + for (GCOVArc *e : v.dsts()) + if (e != pred) + excess -= e->onTree() ? propagateCounts(e->dst, e) : e->Count; + if (int64_t(excess) < 0) + excess = -excess; + if (pred) + pred->Count = excess; + return excess; } void GCOVFunction::print(raw_ostream &OS) const { @@ -322,8 +351,11 @@ void GCOVBlock::print(raw_ostream &OS) const { } if (!succ.empty()) { OS << "\tDestination Edges : "; - for (const GCOVArc *Edge : succ) + for (const GCOVArc *Edge : succ) { + if (Edge->flags & GCOV_ARC_ON_TREE) + OS << '*'; OS << Edge->dst.Number << " (" << Edge->Count << "), "; + } OS << "\n"; } if (!Lines.empty()) { @@ -441,7 +473,7 @@ uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) { uint64_t Count = 0; for (auto Block : Blocks) { - if (Block->getNumSrcEdges() == 0) { + if (Block->getNumSrcEdges() == 0 || Block->Number == 0) { // The block has no predecessors and a non-null counter // (can be the case with entry block in functions). Count += Block->getCount(); @@ -467,11 +499,13 @@ uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) { //===----------------------------------------------------------------------===// // FileInfo implementation. -// Safe integer division, returns 0 if numerator is 0. -static uint32_t safeDiv(uint64_t Numerator, uint64_t Divisor) { - if (!Numerator) +// Format dividend/divisor as a percentage. Return 1 if the result is greater +// than 0% and less than 1%. +static uint32_t formatPercentage(uint64_t dividend, uint64_t divisor) { + if (!dividend || !divisor) return 0; - return Numerator / Divisor; + dividend *= 100; + return dividend < divisor ? 1 : dividend / divisor; } // This custom division function mimics gcov's branch ouputs: @@ -794,14 +828,15 @@ void FileInfo::printFunctionSummary(raw_ostream &OS, for (const GCOVFunction *Func : Funcs) { uint64_t EntryCount = Func->getEntryCount(); uint32_t BlocksExec = 0; + const GCOVBlock &ExitBlock = Func->getExitBlock(); for (const GCOVBlock &Block : Func->blocks()) - if (Block.getNumDstEdges() && Block.getCount()) + if (Block.Number != 0 && &Block != &ExitBlock && Block.getCount()) ++BlocksExec; OS << "function " << Func->getName() << " called " << EntryCount - << " returned " << safeDiv(Func->getExitCount() * 100, EntryCount) + << " returned " << formatPercentage(ExitBlock.getCount(), EntryCount) << "% blocks executed " - << safeDiv(BlocksExec * 100, Func->getNumBlocks() - 1) << "%\n"; + << formatPercentage(BlocksExec, Func->getNumBlocks() - 2) << "%\n"; } } diff --git a/llvm/test/tools/llvm-cov/gcov-4.7.c b/llvm/test/tools/llvm-cov/gcov-4.7.c index d92953a6b0b65f..211c635f51283d 100644 --- a/llvm/test/tools/llvm-cov/gcov-4.7.c +++ b/llvm/test/tools/llvm-cov/gcov-4.7.c @@ -1,27 +1,25 @@ /// Test that llvm-cov supports gcov [4.7,8) compatible format. #include #include -int main() { // GCOV: #####: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: - for (int i = 0; i < 11; i++) // GCOV-NEXT: #####: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: + for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 4: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: - if (result > 400) printf("Overflow!"); // GCOV-NEXT: #####: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 7 + if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-4.7.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-4.7.c | FileCheck %s // CHECK: File 'gcov-4.7.c' -// CHECK-NEXT: Lines executed:55.56% of 9 +// CHECK-NEXT: Lines executed:100.00% of 9 // CHECK-NEXT: Creating 'gcov-4.7.c.gcov' // RUN: FileCheck --input-file=%t/gcov-4.7.c.gcov --check-prefix=HEADER %s diff --git a/llvm/test/tools/llvm-cov/gcov-8.c b/llvm/test/tools/llvm-cov/gcov-8.c index eef3511e93a7c9..996e4cbe71b33d 100644 --- a/llvm/test/tools/llvm-cov/gcov-8.c +++ b/llvm/test/tools/llvm-cov/gcov-8.c @@ -1,29 +1,27 @@ /// Test that llvm-cov supports gcov 8 compatible format. #include #include -int main() { // GCOV: 1: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 7: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: #####: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 8 + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-8.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-8.c | FileCheck %s --check-prefixes=OUT,OUTFILE // OUT: File 'gcov-8.c' -// OUT-NEXT: Lines executed:77.78% of 9 +// OUT-NEXT: Lines executed:100.00% of 9 // OUT-B-NEXT: Branches executed:85.71% of 14 -// OUT-B-NEXT: Taken at least once:42.86% of 14 +// OUT-B-NEXT: Taken at least once:71.43% of 14 // OUT-B-NEXT: No calls // OUTFILE-NEXT: Creating 'gcov-8.c.gcov' // OUT-EMPTY: @@ -51,23 +49,23 @@ int main() { // GCOV: 1: [[@LINE]]:int // I-NEXT:lcount:4,1 // I-NEXT:lcount:6,12 // I-B-NEXT:branch:6,taken -// I-B-NEXT:branch:6,nottaken +// I-B-NEXT:branch:6,taken // I-NEXT:lcount:7,11 // I-B-NEXT:branch:7,taken // I-B-NEXT:branch:7,nottaken -// I-NEXT:lcount:8,7 +// I-NEXT:lcount:8,12 +// I-B-NEXT:branch:8,taken // I-B-NEXT:branch:8,taken -// I-B-NEXT:branch:8,nottaken // I-NEXT:lcount:9,11 // I-NEXT:lcount:10,11 // I-B-NEXT:branch:10,taken // I-B-NEXT:branch:10,nottaken // I-NEXT:lcount:11,11 // I-B-NEXT:branch:11,taken -// I-B-NEXT:branch:11,nottaken +// I-B-NEXT:branch:11,taken // I-B-NEXT:branch:11,taken // I-B-NEXT:branch:11,nottaken -// I-NEXT:lcount:12,0 +// I-NEXT:lcount:12,4 // I-B-NEXT:branch:12,notexec // I-B-NEXT:branch:12,notexec -// I-NEXT:lcount:14,0 +// I-NEXT:lcount:14,1 diff --git a/llvm/test/tools/llvm-cov/gcov-9.c b/llvm/test/tools/llvm-cov/gcov-9.c index 335e6c0663dbef..a2e9cf47497363 100644 --- a/llvm/test/tools/llvm-cov/gcov-9.c +++ b/llvm/test/tools/llvm-cov/gcov-9.c @@ -1,27 +1,25 @@ /// Test that llvm-cov supports gcov 9 compatible format. #include #include -int main() { // GCOV: 1: [[@LINE]]:int main - double a[11], result; // GCOV-NEXT: -: [[@LINE]]: +int main() { // GCOV: 1: [[@LINE]]:int main + double a[11], result; // GCOV-NEXT: -: [[@LINE]]: for (int i = 0; i < 11; i++) // GCOV-NEXT: 12: [[@LINE]]: scanf("%lf", &a[i]); // GCOV-NEXT: 11: [[@LINE]]: - for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 7: [[@LINE]]: + for (int i = 10; i >= 0; i--) { // GCOV-NEXT: 12: [[@LINE]]: result = sqrt(fabs(a[i])) + 5 * pow(a[i], 3); // GCOV-NEXT: 11: [[@LINE]]: printf("\nf(%lf) = "); // GCOV-NEXT: 11: [[@LINE]]: if (result > 400) printf("Overflow!"); // GCOV-NEXT: 11: [[@LINE]]: - else printf("%lf", result); // GCOV-NEXT: #####: [[@LINE]]: - } // GCOV-NEXT: -: [[@LINE]]: - return 0; // GCOV-NEXT: #####: [[@LINE]]: -} // GCOV-NEXT: -: [[@LINE]]: -/// FIXME several lines do not match gcov 9 + else printf("%lf", result); // GCOV-NEXT: 4: [[@LINE]]: + } // GCOV-NEXT: -: [[@LINE]]: + return 0; // GCOV-NEXT: 1: [[@LINE]]: +} // GCOV-NEXT: -: [[@LINE]]: // RUN: rm -rf %t && mkdir %t && cd %t // RUN: cp %s %p/Inputs/gcov-9.gc* . -/// FIXME Lines executed:100.00% of 12 // RUN: llvm-cov gcov gcov-9.c | FileCheck %s // CHECK: File 'gcov-9.c' -// CHECK-NEXT: Lines executed:77.78% of 9 +// CHECK-NEXT: Lines executed:100.00% of 9 // CHECK-NEXT: Creating 'gcov-9.c.gcov' // RUN: FileCheck --input-file=%t/gcov-9.c.gcov --check-prefix=HEADER %s From c2b7b9b642b3247061c4850e9c868c903e3b9654 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 8 Sep 2020 22:09:28 -0500 Subject: [PATCH 135/161] [Hexagon] Fix order of operands in V6_vdealb4w --- llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index b656a845b1526c..c9435cd21c2e0b 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -407,7 +407,7 @@ let Predicates = [UseHVX] in { def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(VecI8 (vpackl HVI16:$Vs)), (V6_vdealb HvxVR:$Vs)>; - def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w HvxVR:$Vs, (IMPLICIT_DEF))>; + def: Pat<(VecI8 (vpackl HVI32:$Vs)), (V6_vdealb4w (IMPLICIT_DEF), HvxVR:$Vs)>; def: Pat<(VecI16 (vpackl HVI32:$Vs)), (V6_vdealh HvxVR:$Vs)>; def: Pat<(VecI16 (bswap HVI16:$Vs)), From 1bb1eac6b177739429e78703b265e7546792fd64 Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Wed, 8 Jul 2020 19:30:53 +0000 Subject: [PATCH 136/161] [libFuzzer] Add a command-line option for tracing mutation of corpus inputs in the dot graph format. This patch adds a new command-line option -mutation_graph_file=FILE for debugging purposes, which traces how corpus inputs evolve during a fuzzing run. For each new input that is added to the corpus, a new vertex corresponding to the added input, as well as a new edge that connects its base input to itself are written to the given file. Each vertex is labeled with the filename of the input, and each edge is labeled with the mutation sequence that led to the input w.r.t. its base input. The format of the mutation graph file is the dot file format. Once prepended and appended with "graph {" and "}", respectively, the graph becomes a valid dot file and can be visualized. Differential Revision: https://reviews.llvm.org/D86560 --- compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 2 ++ compiler-rt/lib/fuzzer/FuzzerFlags.def | 5 ++++ compiler-rt/lib/fuzzer/FuzzerIO.cpp | 13 ++++++++ compiler-rt/lib/fuzzer/FuzzerIO.h | 3 ++ compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 33 +++++++++++++++++++++ compiler-rt/lib/fuzzer/FuzzerMutate.cpp | 9 ++++++ compiler-rt/lib/fuzzer/FuzzerMutate.h | 2 ++ compiler-rt/lib/fuzzer/FuzzerOptions.h | 1 + compiler-rt/test/fuzzer/mutation-graph.test | 17 +++++++++++ 9 files changed, 85 insertions(+) create mode 100644 compiler-rt/test/fuzzer/mutation-graph.test diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp index caafd1dbb0a7ba..57df1238c398ca 100644 --- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp @@ -755,6 +755,8 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.FeaturesDir = Flags.features_dir; ValidateDirectoryExists(Options.FeaturesDir, Flags.create_missing_dirs); } + if (Flags.mutation_graph_file) + Options.MutationGraphFile = Flags.mutation_graph_file; if (Flags.collect_data_flow) Options.CollectDataFlow = Flags.collect_data_flow; if (Flags.stop_file) diff --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def index fdb8362cef9d4f..c9a787e03833d5 100644 --- a/compiler-rt/lib/fuzzer/FuzzerFlags.def +++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def @@ -88,6 +88,11 @@ FUZZER_FLAG_STRING(features_dir, "internal flag. Used to dump feature sets on di "Every time a new input is added to the corpus, a corresponding file in the features_dir" " is created containing the unique features of that input." " Features are stored in binary format.") +FUZZER_FLAG_STRING(mutation_graph_file, "Saves a graph (in DOT format) to" + " mutation_graph_file. The graph contains a vertex for each input that has" + " unique coverage; directed edges are provided between parents and children" + " where the child has unique coverage, and are recorded with the type of" + " mutation that caused the child.") FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") FUZZER_FLAG_INT(use_memmem, 1, "Use hints from intercepting memmem, strstr, etc") diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.cpp b/compiler-rt/lib/fuzzer/FuzzerIO.cpp index c3330c3425d091..54a7219fc0e0fc 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerIO.cpp @@ -77,6 +77,19 @@ void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path) { fclose(Out); } +void AppendToFile(const std::string &Data, const std::string &Path) { + AppendToFile(reinterpret_cast(Data.data()), Data.size(), + Path); +} + +void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path) { + FILE *Out = fopen(Path.c_str(), "a"); + if (!Out) + return; + fwrite(Data, sizeof(Data[0]), Size, Out); + fclose(Out); +} + void ReadDirToVectorOfUnits(const char *Path, Vector *V, long *Epoch, size_t MaxSize, bool ExitOnError) { long E = Epoch ? *Epoch : 0; diff --git a/compiler-rt/lib/fuzzer/FuzzerIO.h b/compiler-rt/lib/fuzzer/FuzzerIO.h index 6e3a0b470c5f6a..abd25110d07d49 100644 --- a/compiler-rt/lib/fuzzer/FuzzerIO.h +++ b/compiler-rt/lib/fuzzer/FuzzerIO.h @@ -29,6 +29,9 @@ void WriteToFile(const uint8_t *Data, size_t Size, const std::string &Path); void WriteToFile(const std::string &Data, const std::string &Path); void WriteToFile(const Unit &U, const std::string &Path); +void AppendToFile(const uint8_t *Data, size_t Size, const std::string &Path); +void AppendToFile(const std::string &Data, const std::string &Path); + void ReadDirToVectorOfUnits(const char *Path, Vector *V, long *Epoch, size_t MaxSize, bool ExitOnError); diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp index f9986dd8eea51c..ce8c2fb7471448 100644 --- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp @@ -463,6 +463,37 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir, DirPlusFile(FeaturesDir, NewFile)); } +static void WriteEdgeToMutationGraphFile(const std::string &MutationGraphFile, + const InputInfo *II, + const InputInfo *BaseII, + const std::string &MS) { + if (MutationGraphFile.empty()) + return; + + std::string Sha1 = Sha1ToString(II->Sha1); + + std::string OutputString; + + // Add a new vertex. + OutputString.append("\""); + OutputString.append(Sha1); + OutputString.append("\"\n"); + + // Add a new edge if there is base input. + if (BaseII) { + std::string BaseSha1 = Sha1ToString(BaseII->Sha1); + OutputString.append("\""); + OutputString.append(BaseSha1); + OutputString.append("\" -> \""); + OutputString.append(Sha1); + OutputString.append("\" [label=\""); + OutputString.append(MS); + OutputString.append("\"];\n"); + } + + AppendToFile(OutputString, MutationGraphFile); +} + bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, InputInfo *II, bool ForceAddToCorpus, bool *FoundUniqFeatures) { @@ -497,6 +528,8 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, TimeOfUnit, UniqFeatureSetTmp, DFT, II); WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1), NewII->UniqFeatureSet); + WriteEdgeToMutationGraphFile(Options.MutationGraphFile, NewII, II, + MD.MutationSequence()); return true; } if (II && FoundUniqFeaturesOfII && diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index df9ada45bb0391..121b450e8b8c56 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -494,6 +494,15 @@ void MutationDispatcher::PrintMutationSequence() { } } +std::string MutationDispatcher::MutationSequence() { + std::string MS; + for (auto M : CurrentMutatorSequence) { + MS += M.Name; + MS += "-"; + } + return MS; +} + size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { return MutateImpl(Data, Size, MaxSize, Mutators); } diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.h b/compiler-rt/lib/fuzzer/FuzzerMutate.h index 6cbce80276248c..3ce3159f6893be 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.h +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.h @@ -26,6 +26,8 @@ class MutationDispatcher { void StartMutationSequence(); /// Print the current sequence of mutations. void PrintMutationSequence(); + /// Return the current sequence of mutations. + std::string MutationSequence(); /// Indicate that the current sequence of mutations was successful. void RecordSuccessfulMutationSequence(); /// Mutates data by invoking user-provided mutator. diff --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h index b17a7474d38f05..706e1c64c706ca 100644 --- a/compiler-rt/lib/fuzzer/FuzzerOptions.h +++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h @@ -59,6 +59,7 @@ struct FuzzingOptions { std::string DataFlowTrace; std::string CollectDataFlow; std::string FeaturesDir; + std::string MutationGraphFile; std::string StopFile; bool SaveArtifacts = true; bool PrintNEW = true; // Print a status line when new units are found; diff --git a/compiler-rt/test/fuzzer/mutation-graph.test b/compiler-rt/test/fuzzer/mutation-graph.test new file mode 100644 index 00000000000000..7774a500395e02 --- /dev/null +++ b/compiler-rt/test/fuzzer/mutation-graph.test @@ -0,0 +1,17 @@ +REQUIRES: linux, x86_64 +RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest + +RUN: rm -rf %t-SimpleTestGraph + +RUN: not %run %t-SimpleTest -seed=1 -max_len=3 -mutation_graph_file=%t-SimpleTestGraph 2>&1 | FileCheck %s +CHECK: BINGO + +RUN: cat %t-SimpleTestGraph | FileCheck %s --check-prefix=GRAPH + +# A vertex and edge that correspond to the discovery of "H" +GRAPH: "7cf184f4c67ad58283ecb19349720b0cae756829" +GRAPH: {{.*}} -> "7cf184f4c67ad58283ecb19349720b0cae756829" [label="{{.*}}"]; + +# A vertex and edge that correspond to the discovery of "Hi" +GRAPH: "94dd9e08c129c785f7f256e82fbe0a30e6d1ae40" +GRAPH: {{.*}} -> "94dd9e08c129c785f7f256e82fbe0a30e6d1ae40" [label="{{.*}}"]; From 795e4ee9d2db386a45dc12e6ead21f5f3151d05c Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 9 Sep 2020 11:20:59 +0700 Subject: [PATCH 137/161] [NFC] Move functon from IndVarSimplify to SCEV This function can be reused in other places. Differential Revision: https://reviews.llvm.org/D87274 Reviewed By: fhahn, lebedev.ri --- llvm/include/llvm/Analysis/ScalarEvolution.h | 5 +++ llvm/lib/Analysis/ScalarEvolution.cpp | 25 +++++++++++++++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 32 +------------------ 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 81c5fc9325884d..ea841440e18034 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -768,6 +768,11 @@ class ScalarEvolution { return getBackedgeTakenCount(L, ConstantMaximum); } + /// Return a symbolic upper bound for the backedge taken count of the loop. + /// This is more general than getConstantMaxBackedgeTakenCount as it returns + /// an arbitrary expression as opposed to only constants. + const SCEV* computeMaxBackedgeTakenCount(const Loop *L); + /// Return true if the backedge taken count is either the value returned by /// getConstantMaxBackedgeTakenCount or zero. bool isBackedgeTakenCountMaxOrZero(const Loop *L); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 40d89fff045878..11d92bc816e9f8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -12506,3 +12506,28 @@ bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); return false; } + +const SCEV* ScalarEvolution::computeMaxBackedgeTakenCount(const Loop *L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + + // Form an expression for the maximum exit count possible for this loop. We + // merge the max and exact information to approximate a version of + // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. + SmallVector ExitCounts; + for (BasicBlock *ExitingBB : ExitingBlocks) { + const SCEV *ExitCount = getExitCount(L, ExitingBB); + if (isa(ExitCount)) + ExitCount = getExitCount(L, ExitingBB, + ScalarEvolution::ConstantMaximum); + if (!isa(ExitCount)) { + assert(DT.dominates(ExitingBB, L->getLoopLatch()) && + "We should only have known counts for exiting blocks that " + "dominate latch!"); + ExitCounts.push_back(ExitCount); + } + } + if (ExitCounts.empty()) + return getCouldNotCompute(); + return getUMinFromMismatchedTypes(ExitCounts); +} diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 51d12faf712ad8..20b85626dced94 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2329,36 +2329,6 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { return MadeAnyChanges; } -/// Return a symbolic upper bound for the backedge taken count of the loop. -/// This is more general than getConstantMaxBackedgeTakenCount as it returns -/// an arbitrary expression as opposed to only constants. -/// TODO: Move into the ScalarEvolution class. -static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE, - DominatorTree &DT, Loop *L) { - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - - // Form an expression for the maximum exit count possible for this loop. We - // merge the max and exact information to approximate a version of - // getConstantMaxBackedgeTakenCount which isn't restricted to just constants. - SmallVector ExitCounts; - for (BasicBlock *ExitingBB : ExitingBlocks) { - const SCEV *ExitCount = SE.getExitCount(L, ExitingBB); - if (isa(ExitCount)) - ExitCount = SE.getExitCount(L, ExitingBB, - ScalarEvolution::ConstantMaximum); - if (!isa(ExitCount)) { - assert(DT.dominates(ExitingBB, L->getLoopLatch()) && - "We should only have known counts for exiting blocks that " - "dominate latch!"); - ExitCounts.push_back(ExitCount); - } - } - if (ExitCounts.empty()) - return SE.getCouldNotCompute(); - return SE.getUMinFromMismatchedTypes(ExitCounts); -} - bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -2391,7 +2361,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { return false; // Get a symbolic upper bound on the loop backedge taken count. - const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L); + const SCEV *MaxExitCount = SE->computeMaxBackedgeTakenCount(L); if (isa(MaxExitCount)) return false; From c58dfbdc818275dd0e8f34939a95da546c49cdf6 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 8 Sep 2020 21:52:23 -0500 Subject: [PATCH 138/161] [flang][msvc] Avoid range-based for over initializer_list. NFC. Msvc crashes with "INTERNAL COMPILER ERROR" when iterating over an `std::initializer_list` in a constexpr constructor. Explicitly use the iterator instead. This patch is part of the series to [[ http://lists.llvm.org/pipermail/flang-dev/2020-July/000448.html | make flang compilable with MS Visual Studio ]]. Reviewed By: isuruf Differential Revision: https://reviews.llvm.org/D86425 --- flang/include/flang/Common/enum-set.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Common/enum-set.h b/flang/include/flang/Common/enum-set.h index a7bdc757a1c97b..5d2eda57aa8197 100644 --- a/flang/include/flang/Common/enum-set.h +++ b/flang/include/flang/Common/enum-set.h @@ -37,8 +37,8 @@ template class EnumSet { constexpr EnumSet() {} constexpr EnumSet(const std::initializer_list &enums) { - for (auto x : enums) { - set(x); + for (auto it{enums.begin()}; it != enums.end(); ++it) { + set(*it); } } constexpr EnumSet(const EnumSet &) = default; From d5d75f61e5fbeb290944ee5d28d6cd13fd40f223 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 18 Aug 2020 15:27:41 -0500 Subject: [PATCH 139/161] [Attributor] Provide a command line option that limits recursion depth In `MultiSource/Benchmarks/tramp3d-v4/tramp3d-v4.cpp` we initialized attributes until stack frame ~35k caused space to run out. The initial size 1024 is pretty much random. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 14 +++++++-- llvm/lib/Transforms/IPO/Attributor.cpp | 8 +++++ llvm/test/Transforms/Attributor/chain.ll | 31 +++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/chain.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 75e7ccde4dba75..4268123841b146 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -133,8 +133,10 @@ struct AAIsDead; class Function; -/// Simple enum classes that forces properties to be spelled out explicitly. -/// +/// The value passed to the line option that defines the maximal initialization +/// chain length. +extern unsigned MaxInitializationChainLength; + ///{ enum class ChangeStatus { CHANGED, @@ -1071,6 +1073,9 @@ struct Attributor { Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || FnScope->hasFnAttribute(Attribute::OptimizeNone); + // Avoid too many nested initializations to prevent a stack overflow. + Invalidate |= InitializationChainLength > MaxInitializationChainLength; + // Bootstrap the new attribute with an initial update to propagate // information, e.g., function -> call site. If it is not on a given // Allowed we will not perform updates at all. @@ -1081,7 +1086,9 @@ struct Attributor { { TimeTraceScope TimeScope(AA.getName() + "::initialize"); + ++InitializationChainLength; AA.initialize(*this); + --InitializationChainLength; } // Initialize and update is allowed for code outside of the current function @@ -1615,6 +1622,9 @@ struct Attributor { CLEANUP, } Phase = AttributorPhase::SEEDING; + /// The current initialization chain length. Tracked to avoid stack overflows. + unsigned InitializationChainLength = 0; + /// Functions, blocks, and instructions we delete after manifest is done. /// ///{ diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 32420e847129f1..2a15c6f0b818d3 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -73,6 +73,14 @@ static cl::opt MaxFixpointIterations("attributor-max-iterations", cl::Hidden, cl::desc("Maximal number of fixpoint iterations."), cl::init(32)); + +static cl::opt MaxInitializationChainLengthX( + "attributor-max-initialization-chain-length", cl::Hidden, + cl::desc( + "Maximal number of chained initializations (to avoid stack overflows)"), + cl::location(MaxInitializationChainLength), cl::init(1024)); +unsigned llvm::MaxInitializationChainLength; + static cl::opt VerifyMaxFixpointIterations( "attributor-max-iterations-verify", cl::Hidden, cl::desc("Verify that max-iterations is a tight bound for a fixpoint"), diff --git a/llvm/test/Transforms/Attributor/chain.ll b/llvm/test/Transforms/Attributor/chain.ll new file mode 100644 index 00000000000000..0306fe22c0b3c4 --- /dev/null +++ b/llvm/test/Transforms/Attributor/chain.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_1 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_1 +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1024 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_5 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -attributor-max-initialization-chain-length=1024 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_5 + +declare void @foo(i8* dereferenceable(8) %arg) + +define dso_local i32 @bar(i32* %arg) { +; CHECK_1-LABEL: define {{[^@]+}}@bar +; CHECK_1-SAME: (i32* dereferenceable_or_null(8) [[ARG:%.*]]) { +; CHECK_1-NEXT: entry: +; CHECK_1-NEXT: [[BC1:%.*]] = bitcast i32* [[ARG]] to i8* +; CHECK_1-NEXT: call void @foo(i8* dereferenceable_or_null(8) [[BC1]]) +; CHECK_1-NEXT: [[LD:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK_1-NEXT: ret i32 [[LD]] +; +; CHECK_5-LABEL: define {{[^@]+}}@bar +; CHECK_5-SAME: (i32* nonnull dereferenceable(8) [[ARG:%.*]]) { +; CHECK_5-NEXT: entry: +; CHECK_5-NEXT: [[BC1:%.*]] = bitcast i32* [[ARG]] to i8* +; CHECK_5-NEXT: call void @foo(i8* nonnull dereferenceable(8) [[BC1]]) +; CHECK_5-NEXT: [[LD:%.*]] = load i32, i32* [[ARG]], align 4 +; CHECK_5-NEXT: ret i32 [[LD]] +; +entry: + %bc1 = bitcast i32* %arg to i8* + call void @foo(i8* %bc1) + %ld = load i32, i32* %arg + ret i32 %ld +} From 2600c9e2efce1dc4c64870b00a45ae0082c685fc Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 4 Sep 2020 11:41:58 -0500 Subject: [PATCH 140/161] [Attributor] Re-enable a run line in noalias.ll This was disabled as we were looking for a weird CGSCC problem. I think/hope we fixed it as there were a lot of updates recently. I could never reproduce this locally so I'll use the pre-commit phab builds to confirm this suspicion and if they seem to be happy I'll assume this is fixed. Reviewed By: sstefan1 Differential Revision: https://reviews.llvm.org/D87266 --- llvm/test/Transforms/Attributor/noalias.ll | 260 ++++++++++----------- 1 file changed, 127 insertions(+), 133 deletions(-) diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index e7e47d42f45664..030089282334cc 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,8 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes ; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM -; TODO: The old pass manager cgscc run is disabled as it causes a crash on windows which is under investigation: http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/23151 -; opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; TEST 1 - negative. @@ -42,10 +41,10 @@ define i8* @return_noalias(){ } define void @nocapture(i8* %a){ -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@nocapture -; NOT_CGSCC_NPM-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@nocapture +; IS__TUNIT____-SAME: (i8* nocapture nofree readnone [[A:%.*]]) [[ATTR0:#.*]] { +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@nocapture @@ -145,10 +144,10 @@ declare i8* @baz(...) nounwind uwtable ; Returning global pointer. Should not be noalias. define i8** @getter() { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@getter -; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: ret i8** @G +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@getter +; IS__TUNIT____-SAME: () [[ATTR0]] { +; IS__TUNIT____-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@getter @@ -160,10 +159,10 @@ define i8** @getter() { ; Returning global pointer. Should not be noalias. define i8** @calle1(){ -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@calle1 -; NOT_CGSCC_NPM-SAME: () [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: ret i8** @G +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@calle1 +; IS__TUNIT____-SAME: () [[ATTR0]] { +; IS__TUNIT____-NEXT: ret i8** @G ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@calle1 @@ -410,6 +409,7 @@ define void @test12_3(){ } define void @test12_4(){ +; ; IS________OPM-LABEL: define {{[^@]+}}@test12_4() { ; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) @@ -422,17 +422,17 @@ define void @test12_4(){ ; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) ; IS________OPM-NEXT: ret void ; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() { -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 -; NOT_TUNIT_OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; NOT_TUNIT_OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) -; NOT_TUNIT_OPM-NEXT: ret void +; IS________NPM-LABEL: define {{[^@]+}}@test12_4() { +; IS________NPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; IS________NPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; IS________NPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; IS________NPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) +; IS________NPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) +; IS________NPM-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) %B = tail call noalias i8* @malloc(i64 4) @@ -470,12 +470,6 @@ define void @test13_use_noalias(){ ; CHECK-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) ; CHECK-NEXT: ret void ; -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() -; IS__CGSCC_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; IS__CGSCC_OPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* -; IS__CGSCC_OPM-NEXT: call void @use_i8_internal(i8* noalias [[C2]]) -; IS__CGSCC_OPM-NEXT: ret void %m1 = tail call noalias i8* @malloc(i64 4) %c1 = bitcast i8* %m1 to i16* %c2 = bitcast i16* %c1 to i8* @@ -504,11 +498,11 @@ define void @test13_use_alias(){ ; TEST 14 i2p casts define internal i32 @p2i(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@p2i -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { -; NOT_CGSCC_NPM-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 -; NOT_CGSCC_NPM-NEXT: ret i32 [[P2I]] +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@p2i +; IS__TUNIT____-SAME: (i32* noalias nofree readnone [[ARG:%.*]]) [[ATTR0]] { +; IS__TUNIT____-NEXT: [[P2I:%.*]] = ptrtoint i32* [[ARG]] to i32 +; IS__TUNIT____-NEXT: ret i32 [[P2I]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@p2i @@ -521,14 +515,14 @@ define internal i32 @p2i(i32* %arg) { } define i32 @i2p(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind readonly willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@i2p -; NOT_CGSCC_NPM-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { -; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] -; NOT_CGSCC_NPM-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* -; NOT_CGSCC_NPM-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* -; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] -; NOT_CGSCC_NPM-NEXT: ret i32 [[CALL]] +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@i2p +; IS__TUNIT____-SAME: (i32* nofree readonly [[ARG:%.*]]) [[ATTR4:#.*]] { +; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @p2i(i32* noalias nofree readnone [[ARG]]) [[ATTR0]] +; IS__TUNIT____-NEXT: [[I2P:%.*]] = inttoptr i32 [[C]] to i8* +; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* [[I2P]] to i32* +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @ret(i32* nocapture nofree readonly align 4 [[BC]]) [[ATTR4]] +; IS__TUNIT____-NEXT: ret i32 [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@i2p @@ -546,11 +540,11 @@ define i32 @i2p(i32* %arg) { ret i32 %call } define internal i32 @ret(i32* %arg) { -; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@ret -; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { -; NOT_CGSCC_NPM-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 -; NOT_CGSCC_NPM-NEXT: ret i32 [[L]] +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@ret +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[ARG:%.*]]) [[ATTR5:#.*]] { +; IS__TUNIT____-NEXT: [[L:%.*]] = load i32, i32* [[ARG]], align 4 +; IS__TUNIT____-NEXT: ret i32 [[L]] ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@ret @@ -572,17 +566,17 @@ define internal i32 @ret(i32* %arg) { ; Function Attrs: nounwind optsize define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@strtox -; NOT_CGSCC_NPM-SAME: (i8* [[S:%.*]]) unnamed_addr { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; NOT_CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] -; NOT_CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; NOT_CGSCC_NPM-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; NOT_CGSCC_NPM-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; NOT_CGSCC_NPM-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; NOT_CGSCC_NPM-NEXT: ret double [[CALL1]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@strtox +; IS__TUNIT____-SAME: (i8* [[S:%.*]]) unnamed_addr { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* +; IS__TUNIT____-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) +; IS__TUNIT____-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) +; IS__TUNIT____-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) +; IS__TUNIT____-NEXT: ret double [[CALL1]] ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@strtox ; IS__CGSCC____-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { @@ -642,11 +636,11 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) @alias_of_p = external global i32* define void @make_alias(i32* %p) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@make_alias -; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { -; NOT_CGSCC_NPM-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@make_alias +; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]]) [[ATTR7:#.*]] { +; IS__TUNIT____-NEXT: store i32* [[P]], i32** @alias_of_p, align 8 +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@make_alias @@ -659,11 +653,11 @@ define void @make_alias(i32* %p) { } define void @only_store(i32* %p) { -; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@only_store -; NOT_CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { -; NOT_CGSCC_NPM-NEXT: store i32 0, i32* [[P]], align 4 -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@only_store +; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[P:%.*]]) [[ATTR8:#.*]] { +; IS__TUNIT____-NEXT: store i32 0, i32* [[P]], align 4 +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@only_store @@ -676,17 +670,17 @@ define void @only_store(i32* %p) { } define void @test15_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test15_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; NOT_CGSCC_NPM: if.then: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] -; NOT_CGSCC_NPM: if.end: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test15_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test15_caller @@ -733,23 +727,23 @@ if.end: ; Therefore, only one of the two conditions of if statementes will be fulfilled. define internal void @test16_sub(i32* noalias %p, i32 %c1, i32 %c2) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_sub -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; NOT_CGSCC_NPM: if.then: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END]] -; NOT_CGSCC_NPM: if.end: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] -; NOT_CGSCC_NPM: if.then2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[IF_END3]] -; NOT_CGSCC_NPM: if.end3: -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_sub +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C1:%.*]], i32 [[C2:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C1]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; IS__TUNIT____: if.then: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* noalias nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 [[C2]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL1]], label [[IF_THEN2:%.*]], label [[IF_END3:%.*]] +; IS__TUNIT____: if.then2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[IF_END3]] +; IS__TUNIT____: if.end3: +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_sub @@ -790,11 +784,11 @@ if.end3: } define void @test16_caller(i32* %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test16_caller -; NOT_CGSCC_NPM-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16_caller +; IS__TUNIT____-SAME: (i32* nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: tail call void @test16_sub(i32* noalias nofree writeonly [[P]], i32 [[C]], i32 [[C]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test16_caller @@ -826,20 +820,20 @@ define void @test16_caller(i32* %p, i32 %c) { ; } define void @test17_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test17_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; NOT_CGSCC_NPM: l1: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[L3:%.*]] -; NOT_CGSCC_NPM: l2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: br label [[L3]] -; NOT_CGSCC_NPM: l3: -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test17_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; IS__TUNIT____: l1: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[L3:%.*]] +; IS__TUNIT____: l2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: br label [[L3]] +; IS__TUNIT____: l3: +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test17_caller @@ -884,10 +878,10 @@ l3: ; } define void @noreturn() { -; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@noreturn -; NOT_CGSCC_NPM-SAME: () [[ATTR9:#.*]] { -; NOT_CGSCC_NPM-NEXT: unreachable +; IS__TUNIT____: Function Attrs: nofree noreturn nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@noreturn +; IS__TUNIT____-SAME: () [[ATTR9:#.*]] { +; IS__TUNIT____-NEXT: unreachable ; ; IS__CGSCC____: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noreturn @@ -899,18 +893,18 @@ define void @noreturn() { } define void @test18_caller(i32* noalias %p, i32 %c) { -; NOT_CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test18_caller -; NOT_CGSCC_NPM-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { -; NOT_CGSCC_NPM-NEXT: entry: -; NOT_CGSCC_NPM-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 -; NOT_CGSCC_NPM-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] -; NOT_CGSCC_NPM: l1: -; NOT_CGSCC_NPM-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: unreachable -; NOT_CGSCC_NPM: l2: -; NOT_CGSCC_NPM-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] -; NOT_CGSCC_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly +; IS__TUNIT____-LABEL: define {{[^@]+}}@test18_caller +; IS__TUNIT____-SAME: (i32* noalias nofree writeonly [[P:%.*]], i32 [[C:%.*]]) [[ATTR7]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[C]], 0 +; IS__TUNIT____-NEXT: br i1 [[TOBOOL]], label [[L1:%.*]], label [[L2:%.*]] +; IS__TUNIT____: l1: +; IS__TUNIT____-NEXT: tail call void @make_alias(i32* nofree writeonly [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: unreachable +; IS__TUNIT____: l2: +; IS__TUNIT____-NEXT: tail call void @only_store(i32* nocapture nofree writeonly align 4 [[P]]) [[ATTR7]] +; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test18_caller From c0ab901bddd5cb80c71848a426b7eaa2882b2ef5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 4 Sep 2020 11:14:33 -0500 Subject: [PATCH 141/161] [Attributor] Selectively look at the callee even when there are operand bundles While operand bundles carry unpredictable semantics, we know some of them and can therefore "ignore" them. In this case we allow to look at the declaration of `llvm.assume` when asked for the attributes at a call site. The assume operand bundles we have do not invalidate the declaration attributes. We cannot test this in isolation because the llvm.assume attributes are determined by the parser. However, a follow up patch will provide test coverage. --- llvm/lib/Transforms/IPO/Attributor.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 2a15c6f0b818d3..4fcea9b5355de5 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -325,6 +325,13 @@ const IRPosition SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRP); + // Helper to determine if operand bundles on a call site are benin or + // potentially problematic. We handle only llvm.assume for now. + auto CanIgnoreOperandBundles = [](const CallBase &CB) { + return (isa(CB) && + cast(CB).getIntrinsicID() == Intrinsic ::assume); + }; + const auto *CB = dyn_cast(&IRP.getAnchorValue()); switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: @@ -339,7 +346,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) if (const Function *Callee = CB->getCalledFunction()) IRPositions.emplace_back(IRPosition::function(*Callee)); return; @@ -347,7 +354,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) { + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { if (const Function *Callee = CB->getCalledFunction()) { IRPositions.emplace_back(IRPosition::returned(*Callee)); IRPositions.emplace_back(IRPosition::function(*Callee)); @@ -368,7 +375,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { assert(CB && ArgNo >= 0 && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. - if (!CB->hasOperandBundles()) { + if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { const Function *Callee = CB->getCalledFunction(); if (Callee && Callee->arg_size() > unsigned(ArgNo)) IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); From cefd2a2c705877feebd909a8537b89a8d1d575cc Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 5 Sep 2020 13:20:31 -0500 Subject: [PATCH 142/161] [Attributor] Cleanup `IRPosition::getArgNo` usages As we handle callback calls we need to disambiguate the call site argument number from the callee argument number. While always equal in non-callback calls, a callback comes with a partial parameter-argument mapping so there is no implicit correspondence. Here we split `IRPosition::getArgNo()` into two public functions, `getCallSiteArgNo()` and `getCalleeArgNo()`. Usages are adjusted to pick the right one for their purpose. This fixed some problems that would have been exposed as we more aggressively optimize callbacks. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 66 ++++++++++++++----- llvm/lib/Transforms/IPO/Attributor.cpp | 17 ++--- .../Transforms/IPO/AttributorAttributes.cpp | 25 +++---- llvm/test/Transforms/Attributor/callbacks.ll | 19 +++--- 4 files changed, 82 insertions(+), 45 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 4268123841b146..9f021f7dc63e29 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -388,10 +388,11 @@ struct IRPosition { /// Return the value this abstract attribute is associated with. Value &getAssociatedValue() const { - if (getArgNo() < 0 || isa(&getAnchorValue())) + if (getCallSiteArgNo() < 0 || isa(&getAnchorValue())) return getAnchorValue(); assert(isa(&getAnchorValue()) && "Expected a call base!"); - return *cast(&getAnchorValue())->getArgOperand(getArgNo()); + return *cast(&getAnchorValue()) + ->getArgOperand(getCallSiteArgNo()); } /// Return the type this abstract attribute is associated with. @@ -401,19 +402,22 @@ struct IRPosition { return getAssociatedValue().getType(); } - /// Return the argument number of the associated value if it is an argument or - /// call site argument, otherwise a negative value. - int getArgNo() const { - switch (getPositionKind()) { - case IRPosition::IRP_ARGUMENT: - return cast(getAsValuePtr())->getArgNo(); - case IRPosition::IRP_CALL_SITE_ARGUMENT: { - Use &U = *getAsUsePtr(); - return cast(U.getUser())->getArgOperandNo(&U); - } - default: - return -1; - } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCallSiteArgNo` this method will always return the "argument number" + /// from the perspective of the callee. This may not the same as the call site + /// if this is a callback call. + int getCalleeArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ true); + } + + /// Return the call site argument number of the associated value if it is an + /// argument or call site argument, otherwise a negative value. In contrast to + /// `getCalleArgNo` this method will always return the "operand number" from + /// the perspective of the call site. This may not the same as the callee + /// perspective if this is a callback call. + int getCallSiteArgNo() const { + return getArgNo(/* CallbackCalleeArgIfApplicable */ false); } /// Return the index in the attribute list for this position. @@ -430,7 +434,7 @@ struct IRPosition { return AttributeList::ReturnIndex; case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: - return getArgNo() + AttributeList::FirstArgIndex; + return getCallSiteArgNo() + AttributeList::FirstArgIndex; } llvm_unreachable( "There is no attribute index for a floating or invalid position!"); @@ -515,6 +519,17 @@ struct IRPosition { } } + /// Return true if the position is an argument or call site argument. + bool isArgumentPosition() const { + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + case IRPosition::IRP_CALL_SITE_ARGUMENT: + return true; + default: + return false; + } + } + /// Special DenseMap key values. /// ///{ @@ -561,6 +576,25 @@ struct IRPosition { verify(); } + /// Return the callee argument number of the associated value if it is an + /// argument or call site argument. See also `getCalleeArgNo` and + /// `getCallSiteArgNo`. + int getArgNo(bool CallbackCalleeArgIfApplicable) const { + if (CallbackCalleeArgIfApplicable) + if (Argument *Arg = getAssociatedArgument()) + return Arg->getArgNo(); + switch (getPositionKind()) { + case IRPosition::IRP_ARGUMENT: + return cast(getAsValuePtr())->getArgNo(); + case IRPosition::IRP_CALL_SITE_ARGUMENT: { + Use &U = *getAsUsePtr(); + return cast(U.getUser())->getArgOperandNo(&U); + } + default: + return -1; + } + } + /// IRPosition for the use \p U. The position kind \p PK needs to be /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value /// the used value. diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 4fcea9b5355de5..9927bca9955524 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -197,7 +197,7 @@ Argument *IRPosition::getAssociatedArgument() const { // Not an Argument and no argument number means this is not a call site // argument, thus we cannot find a callback argument to return. - int ArgNo = getArgNo(); + int ArgNo = getCallSiteArgNo(); if (ArgNo < 0) return nullptr; @@ -371,17 +371,17 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) { IRPositions.emplace_back(IRPosition::callsite_function(*CB)); return; case IRPosition::IRP_CALL_SITE_ARGUMENT: { - int ArgNo = IRP.getArgNo(); - assert(CB && ArgNo >= 0 && "Expected call site!"); + assert(CB && "Expected call site!"); // TODO: We need to look at the operand bundles similar to the redirection // in CallBase. if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) { const Function *Callee = CB->getCalledFunction(); - if (Callee && Callee->arg_size() > unsigned(ArgNo)) - IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo))); - if (Callee) + if (Callee) { + if (Argument *Arg = IRP.getAssociatedArgument()) + IRPositions.emplace_back(IRPosition::argument(*Arg)); IRPositions.emplace_back(IRPosition::function(*Callee)); } + } IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue())); return; } @@ -518,7 +518,7 @@ void IRPosition::verify() { "Expected call base argument operand for a 'call site argument' " "position"); assert(cast(U->getUser())->getArgOperandNo(U) == - unsigned(getArgNo()) && + unsigned(getCallSiteArgNo()) && "Argument number mismatch!"); assert(U->get() == &getAssociatedValue() && "Associated value mismatch!"); return; @@ -2189,7 +2189,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) { raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) { const Value &AV = Pos.getAssociatedValue(); return OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " [" - << Pos.getAnchorValue().getName() << "@" << Pos.getArgNo() << "]}"; + << Pos.getAnchorValue().getName() << "@" << Pos.getCallSiteArgNo() + << "]}"; } raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 0fa5ad92c299e1..b7ec899233e41e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -500,7 +500,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, Optional T; // The argument number which is also the call site argument number. - unsigned ArgNo = QueryingAA.getIRPosition().getArgNo(); + unsigned ArgNo = QueryingAA.getIRPosition().getCallSiteArgNo(); auto CallSiteCheck = [&](AbstractCallSite ACS) { const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo); @@ -2495,7 +2495,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { void initialize(Attributor &A) override { // See callsite argument attribute and callee argument attribute. const auto &CB = cast(getAnchorValue()); - if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias)) + if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias)) indicateOptimisticFixpoint(); Value &Val = getAssociatedValue(); if (isa(Val) && @@ -2510,7 +2510,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl { const AAMemoryBehavior &MemBehaviorAA, const CallBase &CB, unsigned OtherArgNo) { // We do not need to worry about aliasing with the underlying IRP. - if (this->getArgNo() == (int)OtherArgNo) + if (this->getCalleeArgNo() == (int)OtherArgNo) return false; // If it is not a pointer or pointer vector we do not alias. @@ -2925,7 +2925,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { CallBase &CB = cast(getAnchorValue()); - Use &U = CB.getArgOperandUse(getArgNo()); + Use &U = CB.getArgOperandUse(getCallSiteArgNo()); assert(!isa(U.get()) && "Expected undef values to be filtered out!"); UndefValue &UV = *UndefValue::get(U->getType()); @@ -4030,7 +4030,7 @@ struct AANoCaptureImpl : public AANoCapture { return; } - const Function *F = getArgNo() >= 0 ? getAssociatedFunction() : AnchorScope; + const Function *F = isArgumentPosition() ? getAssociatedFunction() : AnchorScope; // Check what state the associated function can actually capture. if (F) @@ -4049,7 +4049,7 @@ struct AANoCaptureImpl : public AANoCapture { if (!isAssumedNoCaptureMaybeReturned()) return; - if (getArgNo() >= 0) { + if (isArgumentPosition()) { if (isAssumedNoCapture()) Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture)); else if (ManifestInternal) @@ -4085,7 +4085,7 @@ struct AANoCaptureImpl : public AANoCapture { State.addKnownBits(NOT_CAPTURED_IN_RET); // Check existing "returned" attributes. - int ArgNo = IRP.getArgNo(); + int ArgNo = IRP.getCalleeArgNo(); if (F.doesNotThrow() && ArgNo >= 0) { for (unsigned u = 0, e = F.arg_size(); u < e; ++u) if (F.hasParamAttribute(u, Attribute::Returned)) { @@ -4262,12 +4262,12 @@ struct AACaptureUseTracker final : public CaptureTracker { ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) { const IRPosition &IRP = getIRPosition(); const Value *V = - getArgNo() >= 0 ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); + isArgumentPosition() ? IRP.getAssociatedArgument() : &IRP.getAssociatedValue(); if (!V) return indicatePessimisticFixpoint(); const Function *F = - getArgNo() >= 0 ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); + isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope(); assert(F && "Expected a function!"); const IRPosition &FnPos = IRPosition::function(*F); const auto &IsDeadAA = @@ -4613,7 +4613,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl { auto PredForCallSite = [&](AbstractCallSite ACS) { const IRPosition &ACSArgPos = - IRPosition::callsite_argument(ACS, getArgNo()); + IRPosition::callsite_argument(ACS, getCallSiteArgNo()); // Check if a coresponding argument was found or if it is on not // associated (which can happen for callback calls). if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID) @@ -4894,7 +4894,8 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating { ? dyn_cast(SimplifiedAssociatedValue.getValue()) : UndefValue::get(V.getType()); if (C) { - Use &U = cast(&getAnchorValue())->getArgOperandUse(getArgNo()); + Use &U = cast(&getAnchorValue()) + ->getArgOperandUse(getCallSiteArgNo()); // We can replace the AssociatedValue with the constant. if (&V != C && V.getType() == C->getType()) { if (A.changeUseAfterManifest(U, *C)) @@ -5213,7 +5214,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return getAssociatedValue().getType()->getPointerElementType(); Optional Ty; - unsigned ArgNo = getIRPosition().getArgNo(); + unsigned ArgNo = getIRPosition().getCallSiteArgNo(); // Make sure the associated call site argument has the same type at all call // sites and it is an allocation we know is safe to privatize, for now that diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 03ca89fd1b08ae..8fbc526bf46d33 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -115,6 +115,7 @@ declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...) ; we deduce and propagate noalias and others properly. define void @t1_caller(i32* noalias %a) { +; ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@t1_caller ; IS__TUNIT_OPM-SAME: (i32* noalias nocapture align 256 [[A:%.*]]) { ; IS__TUNIT_OPM-NEXT: entry: @@ -136,7 +137,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller @@ -160,7 +161,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -190,7 +191,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; ; IS________NPM: Function Attrs: nosync ; IS________NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; IS________NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) [[ATTR0:#.*]] { +; IS________NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) [[ATTR0:#.*]] { ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS________NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -236,7 +237,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller @@ -260,7 +261,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -337,8 +338,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller @@ -363,8 +364,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 noundef 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: From 849146ba93fe14989ea0b727b055854b23e5c5e5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Fri, 4 Sep 2020 11:20:28 -0500 Subject: [PATCH 143/161] [Attributor] Associate the callback callee with a call site argument (if any) If we have a callback, call site arguments were already associated with the callback callee. Now we also associate the function with the callback callee, thus we know ensure that the following holds true (if all return nonnull): `getAssociatedArgument()->getParent() == getAssociatedFunction()` To test this an early exit from `AAMemoryBehaviorCallSiteArgument::initialize`` is included as well. Without the change to getAssociatedFunction() this kind of early exit for declarations would cause callback call site arguments to miss out. --- llvm/include/llvm/Transforms/IPO/Attributor.h | 8 +++++++- .../Transforms/IPO/AttributorAttributes.cpp | 19 +++++++++++++------ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 9f021f7dc63e29..5c0a90339150fb 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -339,8 +339,14 @@ struct IRPosition { /// Return the associated function, if any. Function *getAssociatedFunction() const { - if (auto *CB = dyn_cast(&getAnchorValue())) + if (auto *CB = dyn_cast(&getAnchorValue())) { + // We reuse the logic that associates callback calles to arguments of a + // call site here to identify the callback callee as the associated + // function. + if (Argument *Arg = getAssociatedArgument()) + return Arg->getParent(); return CB->getCalledFunction(); + } return getAnchorScope(); } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index b7ec899233e41e..97d88895bbfcea 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5936,14 +5936,21 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - if (Argument *Arg = getAssociatedArgument()) { - if (Arg->hasByValAttr()) { - addKnownBits(NO_WRITES); - removeKnownBits(NO_READS); - removeAssumedBits(NO_READS); - } + // If we don't have an associated attribute this is either a variadic call + // or an indirect call, either way, nothing to do here. + Argument *Arg = getAssociatedArgument(); + if (!Arg) { + indicatePessimisticFixpoint(); + return; + } + if (Arg->hasByValAttr()) { + addKnownBits(NO_WRITES); + removeKnownBits(NO_READS); + removeAssumedBits(NO_READS); } AAMemoryBehaviorArgument::initialize(A); + if (getAssociatedFunction()->isDeclaration()) + indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). From 6a9a0bfc3350efc0fc7fabec9a1fef94f4e9cc86 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 8 Sep 2020 23:15:37 -0700 Subject: [PATCH 144/161] [llvm-cov gcov] Simply computation of line counts and exit block counter --- llvm/lib/ProfileData/GCOV.cpp | 45 ++++++++++++++++------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 0292e2a09d17c1..f8c576d305f05e 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -220,10 +220,7 @@ bool GCOVFile::readGCDA(GCOVBuffer &buf) { for (std::unique_ptr &arc : fn->arcs) { if (!buf.readInt64(arc->Count)) return false; - // FIXME Fix counters arc->src.Counter += arc->Count; - if (arc->dst.succ.empty()) - arc->dst.Counter += arc->Count; } if (fn->Blocks.size() >= 2) { @@ -469,31 +466,28 @@ void GCOVBlock::getCyclesCount(const BlockVector &Blocks, uint64_t &Count) { } /// Get the count for the list of blocks which lie on the same line. -uint64_t GCOVBlock::getLineCount(const BlockVector &Blocks) { - uint64_t Count = 0; - - for (auto Block : Blocks) { - if (Block->getNumSrcEdges() == 0 || Block->Number == 0) { - // The block has no predecessors and a non-null counter - // (can be the case with entry block in functions). - Count += Block->getCount(); +uint64_t GCOVBlock::getLineCount(const BlockVector &blocks) { + uint64_t count = 0; + for (const GCOVBlock *block : blocks) { + if (block->Number == 0) { + // For nonstandard control flows, arcs into the exit block may be + // duplicately counted (fork) or not be counted (abnormal exit), and thus + // the (exit,entry) counter may be inaccurate. Count the entry block with + // the outgoing arcs. + for (const GCOVArc *arc : block->succ) + count += arc->Count; } else { // Add counts from predecessors that are not on the same line. - for (auto E : Block->srcs()) { - const GCOVBlock *W = &E->src; - if (find(Blocks, W) == Blocks.end()) { - Count += E->Count; - } - } - } - for (auto E : Block->dsts()) { - E->CyclesCount = E->Count; + for (const GCOVArc *arc : block->pred) + if (!llvm::is_contained(blocks, &arc->src)) + count += arc->Count; } + for (GCOVArc *arc : block->succ) + arc->CyclesCount = arc->Count; } - GCOVBlock::getCyclesCount(Blocks, Count); - - return Count; + GCOVBlock::getCyclesCount(blocks, count); + return count; } //===----------------------------------------------------------------------===// @@ -829,12 +823,15 @@ void FileInfo::printFunctionSummary(raw_ostream &OS, uint64_t EntryCount = Func->getEntryCount(); uint32_t BlocksExec = 0; const GCOVBlock &ExitBlock = Func->getExitBlock(); + uint64_t exitCount = 0; + for (const GCOVArc *arc : ExitBlock.pred) + exitCount += arc->Count; for (const GCOVBlock &Block : Func->blocks()) if (Block.Number != 0 && &Block != &ExitBlock && Block.getCount()) ++BlocksExec; OS << "function " << Func->getName() << " called " << EntryCount - << " returned " << formatPercentage(ExitBlock.getCount(), EntryCount) + << " returned " << formatPercentage(exitCount, EntryCount) << "% blocks executed " << formatPercentage(BlocksExec, Func->getNumBlocks() - 2) << "%\n"; } From d445b6dfec13cdf9b9cb01582ec93548ea30ed0e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 30 Aug 2020 14:14:33 -0500 Subject: [PATCH 145/161] [Attributor] Cleanup `::initialize` of various AAs This commit cleans up the ::initialize method of various AAs in the following ways: - If an associated function is required, give up on declarations. This was discovered as a real problem when lots of llvm.dbg.XXX call sites were assumed `noreturn` until proven otherwise. That does not make any sense and caused huge regressions and missed deductions. - Require more associated declarations for function interface AAs. - Use the IRAttribute::initialize to determine if function interface AAs can be used in IPO, don't replicate the checks (especially isFunctionIPOAmendable) all over the place. Arguably the function declaration check should be moved to some central place to. --- .../Transforms/IPO/AttributorAttributes.cpp | 62 ++++++++++++------- .../ArgumentPromotion/X86/attributes.ll | 2 +- .../X86/min-legal-vector-width.ll | 34 +++++----- .../ArgumentPromotion/X86/thiscall.ll | 4 +- .../Attributor/ArgumentPromotion/dbg.ll | 4 +- .../Attributor/ArgumentPromotion/profile.ll | 4 +- .../IPConstantProp/multiple_callbacks.ll | 4 +- .../Attributor/IPConstantProp/pthreads.ll | 4 +- llvm/test/Transforms/Attributor/callbacks.ll | 4 +- .../Attributor/dereferenceable-2.ll | 4 +- .../Transforms/Attributor/heap_to_stack.ll | 6 +- llvm/test/Transforms/Attributor/liveness.ll | 24 +++---- llvm/test/Transforms/Attributor/misc.ll | 4 +- llvm/test/Transforms/Attributor/noalias.ll | 38 ++++-------- llvm/test/Transforms/Attributor/nofree.ll | 4 +- llvm/test/Transforms/Attributor/noundef.ll | 4 +- 16 files changed, 106 insertions(+), 100 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 97d88895bbfcea..7bec9705970385 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -736,7 +736,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl { void initialize(Attributor &A) override { AANoUnwindImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -795,7 +795,7 @@ class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState { ReturnedValues.clear(); Function *F = getAssociatedFunction(); - if (!F) { + if (!F || F->isDeclaration()) { indicatePessimisticFixpoint(); return; } @@ -1388,7 +1388,7 @@ struct AANoSyncCallSite final : AANoSyncImpl { void initialize(Attributor &A) override { AANoSyncImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -1453,7 +1453,7 @@ struct AANoFreeCallSite final : AANoFreeImpl { void initialize(Attributor &A) override { AANoFreeImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -1900,7 +1900,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { void initialize(Attributor &A) override { AANoRecurseImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -2276,7 +2276,7 @@ struct AAWillReturnImpl : public AAWillReturn { AAWillReturn::initialize(A); Function *F = getAnchorScope(); - if (!F || !A.isFunctionIPOAmendable(*F) || mayContainUnboundedCycle(*F, A)) + if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A)) indicatePessimisticFixpoint(); } @@ -2320,9 +2320,9 @@ struct AAWillReturnCallSite final : AAWillReturnImpl { /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { - AAWillReturnImpl::initialize(A); + AAWillReturn::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || !A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); } @@ -2675,6 +2675,14 @@ struct AANoAliasReturned final : AANoAliasImpl { AANoAliasReturned(const IRPosition &IRP, Attributor &A) : AANoAliasImpl(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoAliasImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } + /// See AbstractAttribute::updateImpl(...). virtual ChangeStatus updateImpl(Attributor &A) override { @@ -2716,7 +2724,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl { void initialize(Attributor &A) override { AANoAliasImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -3865,8 +3873,16 @@ struct AAAlignFloating : AAAlignImpl { /// Align attribute for function return value. struct AAAlignReturned final : AAReturnedFromReturnedValues { - AAAlignReturned(const IRPosition &IRP, Attributor &A) - : AAReturnedFromReturnedValues(IRP, A) {} + using Base = AAReturnedFromReturnedValues; + AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Base::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) } @@ -3940,7 +3956,7 @@ struct AAAlignCallSiteReturned final void initialize(Attributor &A) override { Base::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -3956,7 +3972,7 @@ struct AANoReturnImpl : public AANoReturn { void initialize(Attributor &A) override { AANoReturn::initialize(A); Function *F = getAssociatedFunction(); - if (!F) + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); } @@ -5750,7 +5766,7 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior { void initialize(Attributor &A) override { intersectAssumedBits(BEST_STATE); getKnownStateFromValue(getIRPosition(), getState()); - IRAttribute::initialize(A); + AAMemoryBehavior::initialize(A); } /// Return the memory behavior information encoded in the IR for \p IRP. @@ -5981,6 +5997,14 @@ struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating { AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A) : AAMemoryBehaviorFloating(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AAMemoryBehaviorImpl::initialize(A); + Function *F = getAssociatedFunction(); + if (!F || F->isDeclaration()) + indicatePessimisticFixpoint(); + } + /// See AbstractAttribute::manifest(...). ChangeStatus manifest(Attributor &A) override { // We do not annotate returned values. @@ -6030,10 +6054,8 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { void initialize(Attributor &A) override { AAMemoryBehaviorImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !A.isFunctionIPOAmendable(*F)) { + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); - return; - } } /// See AbstractAttribute::updateImpl(...). @@ -6310,7 +6332,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation { void initialize(Attributor &A) override { intersectAssumedBits(BEST_STATE); getKnownStateFromValue(A, getIRPosition(), getState()); - IRAttribute::initialize(A); + AAMemoryLocation::initialize(A); } /// Return the memory behavior information encoded in the IR for \p IRP. @@ -6773,10 +6795,8 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl { void initialize(Attributor &A) override { AAMemoryLocationImpl::initialize(A); Function *F = getAssociatedFunction(); - if (!F || !A.isFunctionIPOAmendable(*F)) { + if (!F || F->isDeclaration()) indicatePessimisticFixpoint(); - return; - } } /// See AbstractAttribute::updateImpl(...). diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index 421ddc2bdd3967..a50017ac733152 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 50d318198e1493..310abfba58d55c 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -44,7 +44,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12:#.*]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -57,7 +57,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11:#.*]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12:#.*]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -138,7 +138,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -151,7 +151,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -232,7 +232,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -245,7 +245,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -326,7 +326,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -339,7 +339,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -418,7 +418,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -431,7 +431,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -508,7 +508,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -521,7 +521,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -600,7 +600,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -613,7 +613,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 @@ -694,7 +694,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) [[ATTR12]] ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 @@ -707,7 +707,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) [[ATTR11]] ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) [[ATTR12]] ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll index 25729fb8933353..29f6a1bf6d3f5f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll @@ -4,8 +4,8 @@ ; we don't do that anymore. It also verifies that the combination of ; globalopt and argpromotion is able to optimize the call safely. ; -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll index 5e40294cdb27bc..64d5adaa75020c 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/dbg.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll index 3584172b242daa..932f9197e9ce16 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index ee411ec0c857ec..91bf46ca2148fc 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 4d8b20cb1cf3f4..5afeb2071d192a 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 8fbc526bf46d33..26e4ce2679cccd 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll index aa3130e4a3190a..816e5c47ef35ba 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-2.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 3c34419a960d43..27774c525c4e03 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -428,9 +428,8 @@ define void @test11() { ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test11() { -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) [[ATTR6]] -; IS________NPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -739,10 +738,9 @@ define void @test16c(i8 %v, i8** %P) { ; ; IS________NPM-LABEL: define {{[^@]+}}@test16c ; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) { -; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 ; IS________NPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) [[ATTR6]] -; IS________NPM-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index ea36bb5f66e8ca..8919cf66cbb9b1 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -854,22 +854,22 @@ define internal void @middle() { ; NOT_CGSCC_NPM-NEXT: call void @non_dead_b3() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB1:%.*]] ; NOT_CGSCC_NPM: bb1: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b4() [[ATTR2:#.*]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b5() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b6() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b7() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b4() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b5() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b6() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b7() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB2:%.*]] ; NOT_CGSCC_NPM: bb2: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b8() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b9() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b10() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b11() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b8() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b9() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b10() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b11() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB3:%.*]] ; NOT_CGSCC_NPM: bb3: -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b12() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b13() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b14() [[ATTR2]] -; NOT_CGSCC_NPM-NEXT: call void @non_dead_b15() [[ATTR2]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b12() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b13() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b14() [[ATTR11]] +; NOT_CGSCC_NPM-NEXT: call void @non_dead_b15() [[ATTR11]] ; NOT_CGSCC_NPM-NEXT: br label [[BB4:%.*]] ; NOT_CGSCC_NPM: bb4: ; NOT_CGSCC_NPM-NEXT: call void @non_exact2() diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index 3fa65e07a5162e..a5c4556ac04173 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 030089282334cc..a4c05fb4ca29d5 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -566,29 +566,17 @@ define internal i32 @ret(i32* %arg) { ; Function Attrs: nounwind optsize define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { -; IS__TUNIT____-LABEL: define {{[^@]+}}@strtox -; IS__TUNIT____-SAME: (i8* [[S:%.*]]) unnamed_addr { -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; IS__TUNIT____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; IS__TUNIT____-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; IS__TUNIT____-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; IS__TUNIT____-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; IS__TUNIT____-NEXT: ret double [[CALL1]] -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@strtox -; IS__CGSCC____-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 -; IS__CGSCC____-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; IS__CGSCC____-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10]] -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; IS__CGSCC____-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) -; IS__CGSCC____-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) -; IS__CGSCC____-NEXT: ret double [[CALL1]] +; CHECK-LABEL: define {{[^@]+}}@strtox +; CHECK-SAME: (i8* noalias [[S:%.*]]) unnamed_addr { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) [[ATTR10:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) +; CHECK-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 noundef 0) +; CHECK-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 noundef 1, i32 noundef 1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) +; CHECK-NEXT: ret double [[CALL1]] ; entry: %f = alloca %struct._IO_FILE, align 8 diff --git a/llvm/test/Transforms/Attributor/nofree.ll b/llvm/test/Transforms/Attributor/nofree.ll index 6cbaf71a01e399..b459527fe2eda4 100644 --- a/llvm/test/Transforms/Attributor/nofree.ll +++ b/llvm/test/Transforms/Attributor/nofree.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM diff --git a/llvm/test/Transforms/Attributor/noundef.ll b/llvm/test/Transforms/Attributor/noundef.ll index 34142af9ef8cd8..211338eefa0b9f 100644 --- a/llvm/test/Transforms/Attributor/noundef.ll +++ b/llvm/test/Transforms/Attributor/noundef.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM From f9ea4501b861ecc987afb4a71266dcc83ae640ca Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 8 Sep 2020 15:58:58 -0500 Subject: [PATCH 146/161] [Attributor][NFC] Improve check lines in depgraph.ll This adds the check lines with -NEXT so we see any change in the future. --- llvm/test/Transforms/Attributor/depgraph.ll | 290 ++++++++++++++------ 1 file changed, 208 insertions(+), 82 deletions(-) diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index 791af581b22a0d..d7dc9d42f49b2b 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -51,88 +51,214 @@ define i32* @checkAndAdvance(i32* align 16 %0) { ; Check for graph ; -; GRAPH: [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> -; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree -; GRAPH: [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind -; GRAPH: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly -; GRAPH: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live -; GRAPH: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly -; GRAPH: [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned -; GRAPH: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull -; GRAPH: [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync -; GRAPH: updates [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync -; GRAPH: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree -; GRAPH: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree -; GRAPH: [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument -; GRAPH: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument -; GRAPH: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> -; GRAPH: updates [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> -; GRAPH: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull -; GRAPH: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state Live[#BB 4/4][#TBEP 0][#KDE 1] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAWillReturn] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-noreturn +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAUndefinedBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state undefined-behavior +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAReturnedValues] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state returns(#3)[#UC: 1] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state range(32) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPotentialValues] for CtxI ' %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state set-state(< {full-set} >) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br i1 %3, label %4, label %7' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoReturn] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-return +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoRecurse] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state may-recurse +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAHeapToStack] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state [H2S] Mallocs: 0 +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state dereferenceable<4-4> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPrivatizablePtr] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state [no-priv] +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUnwind] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AANoUnwind] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nounwind +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly +; GRAPH-NEXT: updates [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live +; GRAPH-NEXT: updates [AAMemoryBehavior] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state readonly +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoCapture] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-NEXT: updates [AANoCapture] for CtxI ' %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryBehavior] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state range(1) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueConstantRange] for CtxI <> at position {flt: [@-1]} with state range(32)<[0,1) / [0,1)> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAPotentialValues] for CtxI ' %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state set-state(< {full-set} >) +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoReturn] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-return +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoAlias] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-alias +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoUndef] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-undef-or-poison +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAValueSimplify] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state simplified +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state align<16-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' ret i32* %.0' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br label %8' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAWillReturn] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-noreturn +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoRecurse] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-recurse +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoSync] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync +; GRAPH-NEXT: updates [AANoSync] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nosync +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANoFree] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree +; GRAPH-NEXT: updates [AANoFree] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state nofree +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAMemoryLocation] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument +; GRAPH-NEXT: updates [AAMemoryLocation] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAIsDead] for CtxI ' br label %8' at position {flt: [@-1]} with state assumed-live +; GRAPH-EMPTY: +; GRAPH-NEXT: [AAAlign] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16> +; GRAPH-NEXT: updates [AAAlign] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state align<0-16> +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state unknown-dereferenceable +; GRAPH-EMPTY: +; GRAPH-NEXT: [AANonNull] for CtxI ' %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull +; GRAPH-NEXT: updates [AANonNull] for CtxI ' %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]} with state nonnull +; GRAPH-EMPTY: +; GRAPH-NEXT: [AADereferenceable] for CtxI ' %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state unknown-dereferenceable + ; GRAPH-NOT: update ; From 3ebc7552270e632d16e7900dd6933ed467159289 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Wed, 9 Sep 2020 07:32:30 +0100 Subject: [PATCH 147/161] [ARM] Try to rematerialize VCTP instructions We really want to try and avoid spilling P0, which can be difficult since there's only one register, so try to rematerialize any VCTP instructions. Differential Revision: https://reviews.llvm.org/D87280 --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 9 ++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 6 +- llvm/lib/Target/ARM/ARMInstrMVE.td | 1 + .../cond-vector-reduce-mve-codegen.ll | 24 ++- .../Thumb2/LowOverheadLoops/remat-vctp.ll | 139 ++++++++++++++++-- 5 files changed, 150 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index dd7b520effa86c..d7d51fdd29ca88 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6134,3 +6134,12 @@ bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault( MachineFunction &MF) const { return Subtarget.isMClass() && MF.getFunction().hasMinSize(); } + +bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const { + // Try hard to rematerialize any VCTPs because if we spill P0, it will block + // the tail predication conversion. This means that the element count + // register has to be live for longer, but that has to be better than + // spill/restore and VPT predication. + return isVCTP(&MI) && !isPredicated(MI); +} diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 53c627c2093433..5bf6e880056def 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -452,6 +452,9 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { MachineInstr *canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) const; + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AAResults *AA) const override; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. @@ -635,8 +638,7 @@ static inline unsigned getTailPredVectorWidth(unsigned Opcode) { return 0; } -static inline -bool isVCTP(MachineInstr *MI) { +static inline bool isVCTP(const MachineInstr *MI) { switch (MI->getOpcode()) { default: break; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 75543093bcbfe6..2287edeef7662f 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5710,6 +5710,7 @@ def MVE_VDWDUPu8 : MVE_VxWDUP<"vdwdup", "u8", 0b00, 0b1>; def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>; def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>; +let isReMaterializable = 1 in class MVE_VCTPInst size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, "$Rn", vpred_n, "", pattern> { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 2fa8a4d8ed7eff..459e2c8395997d 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -10,7 +10,6 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: bxeq lr ; CHECK-NEXT: .LBB0_1: @ %vector.ph ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: add.w r12, r3, #3 ; CHECK-NEXT: mov.w lr, #1 ; CHECK-NEXT: bic r12, r12, #3 @@ -21,28 +20,26 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: and r4, r12, #15 -; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill ; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q1, [r2], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 ; CHECK-NEXT: vdup.32 q3, r4 ; CHECK-NEXT: vpt.i32 eq, q3, zr ; CHECK-NEXT: vmovt q1, q2 -; CHECK-NEXT: add.w r12, r12, #4 -; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vmul.i32 q1, q1, q2 +; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp eq i32 %N, 0 @@ -101,8 +98,7 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldr.w r12, [sp, #40] +; CHECK-NEXT: ldr.w r12, [sp, #32] ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB1_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph @@ -116,10 +112,9 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: and r5, r4, #15 -; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsttt ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r3], #16 @@ -127,22 +122,21 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK-NEXT: vdup.32 q4, r5 ; CHECK-NEXT: vpt.i32 eq, q4, zr ; CHECK-NEXT: vsubt.i32 q1, q3, q2 -; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vmul.i32 q1, q1, q2 +; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: b .LBB1_5 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r7, pc} i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll index 9178217a89e92a..6ce2b9f5f1c026 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/remat-vctp.ll @@ -1,21 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m -mattr=+mve.fp %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp %s -o - | FileCheck %s -define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { +define void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5) { ; CHECK-LABEL: remat_vctp: ; CHECK: @ %bb.0: @ %bb -; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: ldrd lr, r12, [sp, #80] +; CHECK-NEXT: ldrd r5, r12, [sp, #80] +; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: mov r4, r12 ; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r4, #4 ; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: sub.w r4, r12, r4 ; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: add.w lr, r4, #3 +; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: add.w lr, r4, lr, lsr #2 +; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB0_1: @ %bb6 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: subs.w r12, r12, #4 -; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 ; CHECK-NEXT: vabs.s32 q5, q4 @@ -24,7 +30,7 @@ define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i3 ; CHECK-NEXT: vadd.i32 q3, q3, q2 ; CHECK-NEXT: vshr.u32 q6, q5, #24 ; CHECK-NEXT: vand q6, q6, q1 -; CHECK-NEXT: vldrw.u32 q7, [lr, q6, uxtw #2] +; CHECK-NEXT: vldrw.u32 q7, [r5, q6, uxtw #2] ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 ; CHECK-NEXT: vqsub.s32 q6, q0, q6 ; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 @@ -35,18 +41,18 @@ define hidden void @remat_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i3 ; CHECK-NEXT: vqshl.s32 q5, q5, #1 ; CHECK-NEXT: vpt.s32 lt, q4, zr ; CHECK-NEXT: vnegt.s32 q5, q5 -; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 ; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vstrwt.32 q4, [r2], #16 ; CHECK-NEXT: vstrwt.32 q3, [r3], #16 -; CHECK-NEXT: bgt .LBB0_1 +; CHECK-NEXT: le lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %bb44 -; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} bb: %i = zext i16 %arg5 to i32 br label %bb6 @@ -97,6 +103,115 @@ bb44: ; preds = %bb6 ret void } +define void @dont_remat_predicated_vctp(i32* %arg, i32* %arg1, i32* %arg2, i32* %arg3, i32* %arg4, i16 zeroext %arg5, i32 %conv.mask) { +; CHECK-LABEL: dont_remat_predicated_vctp: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: ldrd r6, r12, [sp, #88] +; CHECK-NEXT: movs r4, #4 +; CHECK-NEXT: cmp.w r12, #4 +; CHECK-NEXT: vmvn.i32 q0, #0x80000000 +; CHECK-NEXT: csel r5, r12, r4, lt +; CHECK-NEXT: vmov.i32 q1, #0x3f +; CHECK-NEXT: sub.w r5, r12, r5 +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: add.w lr, r5, #3 +; CHECK-NEXT: movs r5, #1 +; CHECK-NEXT: add.w lr, r5, lr, lsr #2 +; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: .LBB1_1: @ %bb6 +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vpst +; CHECK-NEXT: vctpt.32 r4 +; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vabs.s32 q5, q4 +; CHECK-NEXT: vcls.s32 q3, q5 +; CHECK-NEXT: vshl.u32 q5, q5, q3 +; CHECK-NEXT: vadd.i32 q3, q3, q2 +; CHECK-NEXT: vshr.u32 q6, q5, #24 +; CHECK-NEXT: vand q6, q6, q1 +; CHECK-NEXT: vldrw.u32 q7, [r6, q6, uxtw #2] +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q5 +; CHECK-NEXT: vqsub.s32 q6, q0, q6 +; CHECK-NEXT: vqrdmulh.s32 q6, q7, q6 +; CHECK-NEXT: vqshl.s32 q6, q6, #1 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqsub.s32 q5, q0, q5 +; CHECK-NEXT: vqrdmulh.s32 q5, q6, q5 +; CHECK-NEXT: vqshl.s32 q5, q5, #1 +; CHECK-NEXT: vpt.s32 lt, q4, zr +; CHECK-NEXT: vnegt.s32 q5, q5 +; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: vpst +; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 +; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vstrwt.32 q4, [r2], #16 +; CHECK-NEXT: vstrwt.32 q3, [r3], #16 +; CHECK-NEXT: le lr, .LBB1_1 +; CHECK-NEXT: @ %bb.2: @ %bb44 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: pop {r4, r5, r6, pc} +bb: + %i = zext i16 %arg5 to i32 + br label %bb6 + +bb6: ; preds = %bb6, %bb + %i7 = phi i32* [ %arg3, %bb ], [ %i38, %bb6 ] + %i8 = phi i32 [ %i, %bb ], [ %i42, %bb6 ] + %i9 = phi i32* [ %arg2, %bb ], [ %i41, %bb6 ] + %i10 = phi i32* [ %arg1, %bb ], [ %i40, %bb6 ] + %i11 = phi i32* [ %arg, %bb ], [ %i39, %bb6 ] + %i12 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 4) + %mask = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %i8) + %pred = and <4 x i1> %i12, %mask + %i13 = bitcast i32* %i11 to <4 x i32>* + %i14 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i13, i32 4, <4 x i1> %pred, <4 x i32> zeroinitializer) + %i15 = bitcast i32* %i10 to <4 x i32>* + %i16 = tail call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %i15, i32 4, <4 x i1> %pred, <4 x i32> zeroinitializer) + %i17 = icmp slt <4 x i32> %i16, zeroinitializer + %i18 = sub <4 x i32> zeroinitializer, %i16 + %i19 = select <4 x i1> %i17, <4 x i32> %i18, <4 x i32> %i16 + %i20 = tail call <4 x i32> @llvm.arm.mve.vcls.v4i32(<4 x i32> %i19) + %i21 = shl <4 x i32> %i19, %i20 + %i22 = add <4 x i32> %i20, + %i23 = lshr <4 x i32> %i21, + %i24 = and <4 x i32> %i23, + %i25 = tail call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %arg4, <4 x i32> %i24, i32 32, i32 2, i32 0) + %i26 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i21) + %i27 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i26) + %i28 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i25, <4 x i32> %i27) + %i29 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i28, i32 1, i32 0) + %i30 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i21) + %i31 = tail call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> , <4 x i32> %i30) + %i32 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i29, <4 x i32> %i31) + %i33 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %i32, i32 1, i32 0) + %i34 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %i33, <4 x i1> %i17, <4 x i32> %i33) + %i35 = tail call <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32> %i14, <4 x i32> %i34) + %i36 = bitcast i32* %i9 to <4 x i32>* + %i37 = bitcast i32* %i7 to <4 x i32>* + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i35, <4 x i32>* %i36, i32 4, <4 x i1> %pred) + tail call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %i22, <4 x i32>* %i37, i32 4, <4 x i1> %pred) + %i38 = getelementptr inbounds i32, i32* %i7, i32 4 + %i39 = getelementptr inbounds i32, i32* %i11, i32 4 + %i40 = getelementptr inbounds i32, i32* %i10, i32 4 + %i41 = getelementptr inbounds i32, i32* %i9, i32 4 + %i42 = add nsw i32 %i8, -4 + %i43 = icmp sgt i32 %i8, 4 + br i1 %i43, label %bb6, label %bb44 + +bb44: ; preds = %bb6 + ret void +} + +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) declare <4 x i1> @llvm.arm.mve.vctp32(i32) declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>) declare <4 x i32> @llvm.arm.mve.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) From 2a52c3301a5254d4614401b4aa12ab7c841d7340 Mon Sep 17 00:00:00 2001 From: Denis Antrushin Date: Mon, 7 Sep 2020 22:04:07 +0700 Subject: [PATCH 148/161] [Statepoints] Properly handle const base pointer. Current code in InstEmitter assumes all GC pointers are either VRegs or stack slots - hence, taking only one operand. But it is possible to have constant base, in which case it occupies two machine operands. Add a convinience function to StackMaps to get index of next meta argument and use it in InsrEmitter to properly advance to the next statepoint meta operand. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D87252 --- llvm/include/llvm/CodeGen/StackMaps.h | 4 ++++ .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 20 ++++++---------- llvm/lib/CodeGen/StackMaps.cpp | 23 +++++++++++++++++++ llvm/test/CodeGen/X86/statepoint-vreg.ll | 23 +++++++++++++++++++ 4 files changed, 57 insertions(+), 13 deletions(-) diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h index ce4eb85d645251..578bc0e161a64a 100644 --- a/llvm/include/llvm/CodeGen/StackMaps.h +++ b/llvm/include/llvm/CodeGen/StackMaps.h @@ -261,6 +261,10 @@ class StackMaps { StackMaps(AsmPrinter &AP); + /// Get index of next meta operand. + /// Similar to parseOperand, but does not actually parses operand meaning. + static unsigned getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx); + void reset() { CSInfos.clear(); ConstPool.clear(); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index ff84fdd62075cc..e2da367cfe3f6a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -89,18 +89,9 @@ static unsigned getStatepointGCArgStartIdx(MachineInstr *MI) { "STATEPOINT node expected"); unsigned OperIdx = StatepointOpers(MI).getNumDeoptArgsIdx(); unsigned NumDeopts = MI->getOperand(OperIdx).getImm(); - // At this point stack references has not been lowered yet, so they - // take single operand. ++OperIdx; - while (NumDeopts--) { - MachineOperand &MO = MI->getOperand(OperIdx); - if (MO.isImm() && MO.getImm() == StackMaps::ConstantOp) { - ++OperIdx; - assert(MI->getOperand(OperIdx).isImm() && - "Unexpected statepoint operand"); - } - ++OperIdx; - } + while (NumDeopts--) + OperIdx = StackMaps::getNextMetaArgIdx(MI, OperIdx); return OperIdx; } @@ -1002,11 +993,14 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, assert(!HasPhysRegOuts && "STATEPOINT mishandled"); MachineInstr *MI = MIB; unsigned Def = 0; - unsigned Use = getStatepointGCArgStartIdx(MI) + 1; + unsigned Use = getStatepointGCArgStartIdx(MI); + Use = StackMaps::getNextMetaArgIdx(MI, Use); // first derived + assert(Use < MI->getNumOperands()); while (Def < NumDefs) { if (MI->getOperand(Use).isReg()) MI->tieOperands(Def++, Use); - Use += 2; + Use = StackMaps::getNextMetaArgIdx(MI, Use); // next base + Use = StackMaps::getNextMetaArgIdx(MI, Use); // next derived } } diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp index 113d477ec80a72..806ba1aa982261 100644 --- a/llvm/lib/CodeGen/StackMaps.cpp +++ b/llvm/lib/CodeGen/StackMaps.cpp @@ -88,6 +88,29 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { llvm_unreachable("Unsupported stackmap version!"); } +unsigned StackMaps::getNextMetaArgIdx(MachineInstr *MI, unsigned CurIdx) { + assert(CurIdx < MI->getNumOperands() && "Bad meta arg index"); + const auto &MO = MI->getOperand(CurIdx); + if (MO.isImm()) { + switch (MO.getImm()) { + default: + llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: + CurIdx += 2; + break; + case StackMaps::IndirectMemRefOp: + CurIdx += 3; + break; + case StackMaps::ConstantOp: + ++CurIdx; + break; + } + } + ++CurIdx; + assert(CurIdx < MI->getNumOperands() && "points past operand list"); + return CurIdx; +} + /// Go up the super-register chain until we hit a valid dwarf register number. static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { int RegNum = TRI->getDwarfRegNum(Reg, false); diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll index b613a949c273d9..66b984b905364a 100644 --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -47,6 +47,7 @@ entry: call void @consume(i32 addrspace(1)* %rel1) ret i1 %res1 } + ; test pointer variables intermixed with pointer constants define void @test_mixed(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) gc "statepoint-example" { ; CHECK-LABEL: test_mixed: @@ -567,6 +568,28 @@ exceptional_return.right: ret i64 addrspace(1)* %val.relocated3 } +; test ISEL for constant base pointer - must properly tie operands +define void @test_const_base(i32 addrspace(1)* %a) gc "statepoint-example" { +; CHECK-LABEL: test_const_base: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx +; CHECK-NEXT: callq func +; CHECK-NEXT: .Ltmp24: +; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: callq consume +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %token1 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) ["deopt" (i32 0, i32 1, i32 7, i32 addrspace(1)* null, i32 9), "gc-live" (i32 addrspace(1)* null, i32 addrspace(1)* %a)] + %rel = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token1, i32 0, i32 1) + call void @consume(i32 addrspace(1)* %rel) + ret void +} + declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) From 6a494e117cd99fc5b4c728d9f5a78ae817f93434 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 9 Sep 2020 07:16:45 +0000 Subject: [PATCH 149/161] [MLIR] Add debug support for ignored patterns The rewrite engine's cost model may determine some patterns to be irrelevant ahead of their application. These patterns were silently ignored previously and now cause a message in `--debug` mode. Differential Revision: https://reviews.llvm.org/D87290 --- mlir/lib/IR/PatternMatch.cpp | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/mlir/lib/IR/PatternMatch.cpp b/mlir/lib/IR/PatternMatch.cpp index a26bc63ed89d0a..d1da8d1d8f263d 100644 --- a/mlir/lib/IR/PatternMatch.cpp +++ b/mlir/lib/IR/PatternMatch.cpp @@ -10,9 +10,12 @@ #include "mlir/IR/BlockAndValueMapping.h" #include "mlir/IR/Operation.h" #include "mlir/IR/Value.h" +#include "llvm/Support/Debug.h" using namespace mlir; +#define DEBUG_TYPE "pattern-match" + PatternBenefit::PatternBenefit(unsigned benefit) : representation(benefit) { assert(representation == benefit && benefit != ImpossibleToMatchSentinel && "This pattern match benefit is too large to represent"); @@ -207,8 +210,14 @@ void PatternApplicator::applyCostModel(CostModel model) { anyOpPatterns.clear(); for (const auto &pat : owningPatternList) { // If the pattern is always impossible to match, just ignore it. - if (pat->getBenefit().isImpossibleToMatch()) + if (pat->getBenefit().isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() + << "Ignoring pattern '" << pat->getRootKind() + << "' because it is impossible to match (by pattern benefit)\n"; + }); continue; + } if (Optional opName = pat->getRootKind()) patterns[*opName].push_back(pat.get()); else @@ -223,8 +232,14 @@ void PatternApplicator::applyCostModel(CostModel model) { auto processPatternList = [&](SmallVectorImpl &list) { // Special case for one pattern in the list, which is the most common case. if (list.size() == 1) { - if (model(*list.front()).isImpossibleToMatch()) + if (model(*list.front()).isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() << "Ignoring pattern '" << list.front()->getRootKind() + << "' because it is impossible to match or cannot lead " + "to legal IR (by cost model)\n"; + }); list.clear(); + } return; } @@ -236,8 +251,14 @@ void PatternApplicator::applyCostModel(CostModel model) { // Sort patterns with highest benefit first, and remove those that are // impossible to match. std::stable_sort(list.begin(), list.end(), cmp); - while (!list.empty() && benefits[list.back()].isImpossibleToMatch()) + while (!list.empty() && benefits[list.back()].isImpossibleToMatch()) { + LLVM_DEBUG({ + llvm::dbgs() << "Ignoring pattern '" << list.back()->getRootKind() + << "' because it is impossible to match or cannot lead to " + "legal IR (by cost model)\n"; + }); list.pop_back(); + } }; for (auto &it : patterns) processPatternList(it.second); From 4e4a3feecdb6bd56483b9c6ba9116609c20588aa Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 09:29:51 +0200 Subject: [PATCH 150/161] [lldb][doc] Mention python3-dev instead of python2.7-dev in build docs --- lldb/docs/resources/build.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst index c1cb6ec1a9343a..b5c1fb8cb00124 100644 --- a/lldb/docs/resources/build.rst +++ b/lldb/docs/resources/build.rst @@ -71,7 +71,7 @@ commands below. :: > yum install libedit-devel libxml2-devel ncurses-devel python-devel swig - > sudo apt-get install build-essential subversion swig python2.7-dev libedit-dev libncurses5-dev + > sudo apt-get install build-essential subversion swig python3-dev libedit-dev libncurses5-dev > pkg install swig python > pkgin install swig python27 cmake ninja-build > brew install swig cmake ninja From c0e5e3fbfa504c3792023d0db9008b08caa6b6d7 Mon Sep 17 00:00:00 2001 From: Eduardo Caldas Date: Tue, 8 Sep 2020 11:32:02 +0000 Subject: [PATCH 151/161] [Ignore Expressions] Fix performance regression by inlining `Ignore*SingleStep` We also add a `const` versions of `IgnoreExprNodes` Differential Revision: https://reviews.llvm.org/D87278 --- clang/include/clang/AST/IgnoreExpr.h | 118 ++++++++++++++++++++++-- clang/lib/AST/CMakeLists.txt | 1 - clang/lib/AST/IgnoreExpr.cpp | 129 --------------------------- 3 files changed, 109 insertions(+), 139 deletions(-) delete mode 100644 clang/lib/AST/IgnoreExpr.cpp diff --git a/clang/include/clang/AST/IgnoreExpr.h b/clang/include/clang/AST/IgnoreExpr.h index 0aeb547606a2b1..1c2b538e5b6353 100644 --- a/clang/include/clang/AST/IgnoreExpr.h +++ b/clang/include/clang/AST/IgnoreExpr.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_AST_IGNOREEXPR_H #include "clang/AST/Expr.h" +#include "clang/AST/ExprCXX.h" namespace clang { namespace detail { @@ -38,23 +39,122 @@ template Expr *IgnoreExprNodes(Expr *E, FnTys &&... Fns) { return E; } -Expr *IgnoreImplicitCastsSingleStep(Expr *E); +template +const Expr *IgnoreExprNodes(const Expr *E, FnTys &&...Fns) { + return const_cast(IgnoreExprNodes(E, std::forward(Fns)...)); +} + +inline Expr *IgnoreImplicitCastsSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E) { + // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in + // addition to what IgnoreImpCasts() skips to account for the current + // behaviour of IgnoreParenImpCasts(). + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +inline Expr *IgnoreCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + return CE->getSubExpr(); + + if (auto *FE = dyn_cast(E)) + return FE->getSubExpr(); + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *NTTP = dyn_cast(E)) + return NTTP->getReplacement(); + + return E; +} + +inline Expr *IgnoreLValueCastsSingleStep(Expr *E) { + // Skip what IgnoreCastsSingleStep skips, except that only + // lvalue-to-rvalue casts are skipped. + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() != CK_LValueToRValue) + return E; -Expr *IgnoreImplicitCastsExtraSingleStep(Expr *E); + return IgnoreCastsSingleStep(E); +} + +inline Expr *IgnoreBaseCastsSingleStep(Expr *E) { + if (auto *CE = dyn_cast(E)) + if (CE->getCastKind() == CK_DerivedToBase || + CE->getCastKind() == CK_UncheckedDerivedToBase || + CE->getCastKind() == CK_NoOp) + return CE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitSingleStep(Expr *E) { + Expr *SubE = IgnoreImplicitCastsSingleStep(E); + if (SubE != E) + return SubE; + + if (auto *MTE = dyn_cast(E)) + return MTE->getSubExpr(); + + if (auto *BTE = dyn_cast(E)) + return BTE->getSubExpr(); + + return E; +} + +inline Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E) { + if (auto *ICE = dyn_cast(E)) + return ICE->getSubExprAsWritten(); -Expr *IgnoreCastsSingleStep(Expr *E); + return IgnoreImplicitSingleStep(E); +} -Expr *IgnoreLValueCastsSingleStep(Expr *E); +inline Expr *IgnoreParensOnlySingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); + return E; +} -Expr *IgnoreBaseCastsSingleStep(Expr *E); +inline Expr *IgnoreParensSingleStep(Expr *E) { + if (auto *PE = dyn_cast(E)) + return PE->getSubExpr(); -Expr *IgnoreImplicitSingleStep(Expr *E); + if (auto *UO = dyn_cast(E)) { + if (UO->getOpcode() == UO_Extension) + return UO->getSubExpr(); + } -Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E); + else if (auto *GSE = dyn_cast(E)) { + if (!GSE->isResultDependent()) + return GSE->getResultExpr(); + } -Expr *IgnoreParensOnlySingleStep(Expr *E); + else if (auto *CE = dyn_cast(E)) { + if (!CE->isConditionDependent()) + return CE->getChosenSubExpr(); + } -Expr *IgnoreParensSingleStep(Expr *E); + return E; +} } // namespace clang diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index dfd26fd97bc6d8..35099fd0dacf83 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -55,7 +55,6 @@ add_clang_library(clangAST ExternalASTMerger.cpp ExternalASTSource.cpp FormatString.cpp - IgnoreExpr.cpp InheritViz.cpp Interp/ByteCodeEmitter.cpp Interp/ByteCodeExprGen.cpp diff --git a/clang/lib/AST/IgnoreExpr.cpp b/clang/lib/AST/IgnoreExpr.cpp deleted file mode 100644 index 65aaaeb6a1ed00..00000000000000 --- a/clang/lib/AST/IgnoreExpr.cpp +++ /dev/null @@ -1,129 +0,0 @@ -//===--- IgnoreExpr.cpp - Ignore intermediate Expressions -----------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements common functions to ignore intermediate expression nodes -// -//===----------------------------------------------------------------------===// - -#include "clang/AST/IgnoreExpr.h" -#include "clang/AST/Expr.h" -#include "clang/AST/ExprCXX.h" - -using namespace clang; - -Expr *clang::IgnoreImplicitCastsSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitCastsExtraSingleStep(Expr *E) { - // FIXME: Skip MaterializeTemporaryExpr and SubstNonTypeTemplateParmExpr in - // addition to what IgnoreImpCasts() skips to account for the current - // behaviour of IgnoreParenImpCasts(). - Expr *SubE = IgnoreImplicitCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -Expr *clang::IgnoreCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - return CE->getSubExpr(); - - if (auto *FE = dyn_cast(E)) - return FE->getSubExpr(); - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *NTTP = dyn_cast(E)) - return NTTP->getReplacement(); - - return E; -} - -Expr *clang::IgnoreLValueCastsSingleStep(Expr *E) { - // Skip what IgnoreCastsSingleStep skips, except that only - // lvalue-to-rvalue casts are skipped. - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() != CK_LValueToRValue) - return E; - - return IgnoreCastsSingleStep(E); -} - -Expr *clang::IgnoreBaseCastsSingleStep(Expr *E) { - if (auto *CE = dyn_cast(E)) - if (CE->getCastKind() == CK_DerivedToBase || - CE->getCastKind() == CK_UncheckedDerivedToBase || - CE->getCastKind() == CK_NoOp) - return CE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitSingleStep(Expr *E) { - Expr *SubE = IgnoreImplicitCastsSingleStep(E); - if (SubE != E) - return SubE; - - if (auto *MTE = dyn_cast(E)) - return MTE->getSubExpr(); - - if (auto *BTE = dyn_cast(E)) - return BTE->getSubExpr(); - - return E; -} - -Expr *clang::IgnoreImplicitAsWrittenSingleStep(Expr *E) { - if (auto *ICE = dyn_cast(E)) - return ICE->getSubExprAsWritten(); - - return IgnoreImplicitSingleStep(E); -} - -Expr *clang::IgnoreParensOnlySingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - return E; -} - -Expr *clang::IgnoreParensSingleStep(Expr *E) { - if (auto *PE = dyn_cast(E)) - return PE->getSubExpr(); - - if (auto *UO = dyn_cast(E)) { - if (UO->getOpcode() == UO_Extension) - return UO->getSubExpr(); - } - - else if (auto *GSE = dyn_cast(E)) { - if (!GSE->isResultDependent()) - return GSE->getResultExpr(); - } - - else if (auto *CE = dyn_cast(E)) { - if (!CE->isConditionDependent()) - return CE->getChosenSubExpr(); - } - - return E; -} From fdc8a1aac293084ffb2d7f04b1225c8e2fb3b164 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 9 Sep 2020 07:32:57 +0000 Subject: [PATCH 152/161] [gn build] Port c0e5e3fbfa5 --- llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn index bb3d69d046bef1..4d645799dbf655 100644 --- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -81,7 +81,6 @@ static_library("AST") { "ExternalASTMerger.cpp", "ExternalASTSource.cpp", "FormatString.cpp", - "IgnoreExpr.cpp", "InheritViz.cpp", "Interp/ByteCodeEmitter.cpp", "Interp/ByteCodeExprGen.cpp", From 133322d2e30877d5039643ab5c2ed02f75c29466 Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 9 Sep 2020 07:44:38 +0000 Subject: [PATCH 153/161] [MLIR][Standard] Update `tensor_from_elements` assembly format Remove the redundant parenthesis that are used for none of the other operation formats. Differential Revision: https://reviews.llvm.org/D86287 --- .../include/mlir/Dialect/StandardOps/IR/Ops.td | 11 +++-------- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 18 +++++++++++++----- .../ShapeToStandard/shape-to-standard.mlir | 6 +++--- mlir/test/IR/core-ops.mlir | 12 ++++++------ mlir/test/IR/invalid-ops.mlir | 4 ++-- mlir/test/Transforms/canonicalize.mlir | 2 +- 6 files changed, 28 insertions(+), 25 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index f326ae55786500..c276818589afe2 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1621,14 +1621,9 @@ def TensorFromElementsOp : Std_Op<"tensor_from_elements", let results = (outs AnyTensor:$result); let skipDefaultBuilders = 1; - let builders = [OpBuilder< - "OpBuilder &builder, OperationState &result, ValueRange elements", [{ - assert(!elements.empty() && "expected at least one element"); - result.addOperands(elements); - result.addTypes( - RankedTensorType::get({static_cast(elements.size())}, - *elements.getTypes().begin())); - }]>]; + let builders = [ + OpBuilder<"OpBuilder &b, OperationState &result, ValueRange elements"> + ]; let hasCanonicalizer = 1; } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 65f8b83d9a7187..1c69019870198c 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1744,9 +1744,9 @@ static ParseResult parseTensorFromElementsOp(OpAsmParser &parser, OperationState &result) { SmallVector elementsOperands; Type resultType; - if (parser.parseLParen() || parser.parseOperandList(elementsOperands) || - parser.parseRParen() || parser.parseOptionalAttrDict(result.attributes) || - parser.parseColon() || parser.parseType(resultType)) + if (parser.parseOperandList(elementsOperands) || + parser.parseOptionalAttrDict(result.attributes) || + parser.parseColonType(resultType)) return failure(); if (parser.resolveOperands(elementsOperands, @@ -1759,9 +1759,9 @@ static ParseResult parseTensorFromElementsOp(OpAsmParser &parser, } static void print(OpAsmPrinter &p, TensorFromElementsOp op) { - p << "tensor_from_elements(" << op.elements() << ')'; + p << "tensor_from_elements " << op.elements(); p.printOptionalAttrDict(op.getAttrs()); - p << " : " << op.result().getType(); + p << " : " << op.getType(); } static LogicalResult verify(TensorFromElementsOp op) { @@ -1778,6 +1778,14 @@ static LogicalResult verify(TensorFromElementsOp op) { return success(); } +void TensorFromElementsOp::build(OpBuilder &builder, OperationState &result, + ValueRange elements) { + assert(!elements.empty() && "expected at least one element"); + result.addOperands(elements); + result.addTypes(RankedTensorType::get({static_cast(elements.size())}, + *elements.getTypes().begin())); +} + namespace { // Canonicalizes the pattern of the form diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index bf8e74e5143ed0..4d2437a4877bc2 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -94,7 +94,7 @@ func @const_shape() -> tensor { // CHECK: %[[C1:.*]] = constant 1 : index // CHECK: %[[C2:.*]] = constant 2 : index // CHECK: %[[C3:.*]] = constant 3 : index - // CHECK: %[[TENSOR3:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) + // CHECK: %[[TENSOR3:.*]] = tensor_from_elements %[[C1]], %[[C2]], %[[C3]] // CHECK: %[[RESULT:.*]] = tensor_cast %[[TENSOR3]] : tensor<3xindex> to tensor // CHECK: return %[[RESULT]] : tensor %shape = shape.const_shape [1, 2, 3] : tensor @@ -223,7 +223,7 @@ func @shape_of_stat(%arg : tensor<1x2x3xf32>) { // CHECK-DAG: %[[C1:.*]] = constant 1 : index // CHECK-DAG: %[[C2:.*]] = constant 2 : index // CHECK-DAG: %[[C3:.*]] = constant 3 : index - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C2]], %[[C3]]) : tensor<3xindex> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements %[[C1]], %[[C2]], %[[C3]] : tensor<3xindex> %shape = shape.shape_of %arg : tensor<1x2x3xf32> -> tensor return } @@ -238,7 +238,7 @@ func @shape_of_dyn(%arg : tensor<1x5x?xf32>) { // CHECK-DAG: %[[C5:.*]] = constant 5 : index // CHECK-DAG: %[[C2:.*]] = constant 2 : index // CHECK-DAG: %[[DYN_DIM:.*]] = dim %[[ARG]], %[[C2]] : tensor<1x5x?xf32> - // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements(%[[C1]], %[[C5]], %[[DYN_DIM]]) : tensor<3xindex> + // CHECK-DAG: %[[SHAPE_UNCASTED:.*]] = tensor_from_elements %[[C1]], %[[C5]], %[[DYN_DIM]] : tensor<3xindex> %shape = shape.shape_of %arg : tensor<1x5x?xf32> -> tensor return } diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 69e974bc41734d..e4472b444f0344 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -661,17 +661,17 @@ func @extract_element(%arg0: tensor<*xi32>, %arg1 : tensor<4x4xf32>) -> i32 { // CHECK-LABEL: func @tensor_from_elements() { func @tensor_from_elements() { %c0 = "std.constant"() {value = 0: index} : () -> index - // CHECK: %0 = tensor_from_elements(%c0) : tensor<1xindex> - %0 = tensor_from_elements(%c0) : tensor<1xindex> + // CHECK: %0 = tensor_from_elements %c0 : tensor<1xindex> + %0 = tensor_from_elements %c0 : tensor<1xindex> %c1 = "std.constant"() {value = 1: index} : () -> index - // CHECK: %1 = tensor_from_elements(%c0, %c1) : tensor<2xindex> - %1 = tensor_from_elements(%c0, %c1) : tensor<2xindex> + // CHECK: %1 = tensor_from_elements %c0, %c1 : tensor<2xindex> + %1 = tensor_from_elements %c0, %c1 : tensor<2xindex> %c0_f32 = "std.constant"() {value = 0.0: f32} : () -> f32 // CHECK: [[C0_F32:%.*]] = constant - // CHECK: %2 = tensor_from_elements([[C0_F32]]) : tensor<1xf32> - %2 = tensor_from_elements(%c0_f32) : tensor<1xf32> + // CHECK: %2 = tensor_from_elements [[C0_F32]] : tensor<1xf32> + %2 = tensor_from_elements %c0_f32 : tensor<1xf32> return } diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 55739119aa26d6..71b007ef6e39f3 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -597,7 +597,7 @@ func @extract_element_tensor_too_few_indices(%t : tensor<2x3xf32>, %i : index) { func @tensor_from_elements_wrong_result_type() { // expected-error@+2 {{expected result type to be a ranked tensor}} %c0 = constant 0 : i32 - %0 = tensor_from_elements(%c0) : tensor<*xi32> + %0 = tensor_from_elements %c0 : tensor<*xi32> return } @@ -606,7 +606,7 @@ func @tensor_from_elements_wrong_result_type() { func @tensor_from_elements_wrong_elements_count() { // expected-error@+2 {{expected result type to be a 1D tensor with 1 element}} %c0 = constant 0 : index - %0 = tensor_from_elements(%c0) : tensor<2xindex> + %0 = tensor_from_elements %c0 : tensor<2xindex> return } diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index 7333446c6e5d93..76fe82588be3e8 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -981,7 +981,7 @@ func @memref_cast_folding_subview_static(%V: memref<16x16xf32>, %a: index, %b: i func @extract_element_from_tensor_from_elements(%element : index) -> index { // CHECK-SAME: ([[ARG:%.*]]: index) %c0 = constant 0 : index - %tensor = tensor_from_elements(%element) : tensor<1xindex> + %tensor = tensor_from_elements %element : tensor<1xindex> %extracted_element = extract_element %tensor[%c0] : tensor<1xindex> // CHECK: [[ARG]] : index return %extracted_element : index From 5106a8b8f8d0d3dd6c3fc0554f05402d8d9177ef Mon Sep 17 00:00:00 2001 From: Frederik Gossen Date: Wed, 9 Sep 2020 07:53:13 +0000 Subject: [PATCH 154/161] [MLIR][Shape] Lower `shape_of` to `dynamic_tensor_from_elements` Take advantage of the new `dynamic_tensor_from_elements` operation in `std`. Instead of stack-allocated memory, we can now lower directly to a single `std` operation. Differential Revision: https://reviews.llvm.org/D86935 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 7 +++++ .../ShapeToStandard/ShapeToStandard.cpp | 27 +++++++------------ mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 16 +++++++++++ .../ShapeToStandard/shape-to-standard.mlir | 13 ++++----- 4 files changed, 37 insertions(+), 26 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index c276818589afe2..44bbb423b2d950 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1504,6 +1504,13 @@ def DynamicTensorFromElementsOp : Std_Op<"dynamic_tensor_from_elements", let arguments = (ins Variadic:$dynamicExtents); let results = (outs AnyRankedTensor:$result); let regions = (region SizedRegion<1>:$body); + + let builders = [ + // Build op and populate its body per callback function. + OpBuilder<"OpBuilder &b, OperationState &result, Type resultTy, " + "ValueRange dynamicExtents, " + "function_ref">, + ]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp index 8c917e08f942cd..f3f11e89af02fb 100644 --- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp +++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp @@ -422,6 +422,7 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( return failure(); // For ranked tensor arguments, lower to `tensor_from_elements`. + auto loc = op.getLoc(); ShapeOfOp::Adaptor transformed(operands); Value tensor = transformed.arg(); Type tensorTy = tensor.getType(); @@ -431,7 +432,6 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( SmallVector extentValues; RankedTensorType rankedTensorTy = tensorTy.cast(); int64_t rank = rankedTensorTy.getRank(); - auto loc = op.getLoc(); for (int64_t i = 0; i < rank; i++) { if (rankedTensorTy.isDynamicDim(i)) { Value extent = rewriter.create(loc, tensor, i); @@ -451,26 +451,17 @@ LogicalResult ShapeOfOpConversion::matchAndRewrite( return success(); } - // Allocate stack memory. - auto loc = op.getLoc(); + // Lower to `dynamic_tensor_from_elements` otherwise. + auto *ctx = rewriter.getContext(); Value rank = rewriter.create(loc, tensor); - Type indexTy = rewriter.getIndexType(); - Type memTy = MemRefType::get({ShapedType::kDynamicSize}, indexTy); - Value mem = rewriter.create(loc, memTy, ValueRange{rank}); - - // Copy shape extents to stack-allocated memory. - Value zero = rewriter.create(loc, 0); - Value one = rewriter.create(loc, 1); - rewriter.create( - loc, zero, rank, one, llvm::None, - [&](OpBuilder &b, Location loc, Value iv, ValueRange args) { - Value dim = rewriter.create(loc, tensor, iv); - rewriter.create(loc, dim, mem, ValueRange{iv}); - rewriter.create(loc); + rewriter.replaceOpWithNewOp( + op, getExtentTensorType(ctx), ValueRange{rank}, + [&](OpBuilder &b, Location loc, ValueRange args) { + Value dim = args.front(); + Value extent = b.create(loc, tensor, dim); + b.create(loc, extent); }); - // Load extents to tensor value. - rewriter.replaceOpWithNewOp(op.getOperation(), mem); return success(); } diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 1c69019870198c..a0ad05852e230d 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1694,6 +1694,22 @@ static LogicalResult verify(DynamicTensorFromElementsOp op) { return success(); } +void DynamicTensorFromElementsOp::build( + OpBuilder &b, OperationState &result, Type resultTy, + ValueRange dynamicExtents, + function_ref bodyBuilder) { + build(b, result, resultTy, dynamicExtents); + + // Build and populate body. + OpBuilder::InsertionGuard guard(b); + Region *bodyRegion = result.regions.front().get(); + auto rank = resultTy.cast().getRank(); + SmallVector argumentTypes(rank, b.getIndexType()); + Block *bodyBlock = + b.createBlock(bodyRegion, bodyRegion->end(), argumentTypes); + bodyBuilder(b, result.location, bodyBlock->getArguments()); +} + //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir index 4d2437a4877bc2..4168634f1240dd 100644 --- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir +++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir @@ -191,14 +191,11 @@ func @shape_of(%arg : tensor<*xf32>) { // CHECK-SAME: (%[[ARG:.*]]: tensor<*xf32>) func @shape_of_unranked(%arg : tensor<*xf32>) { // CHECK: %[[RANK:.*]] = rank %[[ARG]] : tensor<*xf32> - // CHECK: %[[SHAPE_MEM:.*]] = alloca(%[[RANK]]) : memref - // CHECK: %[[C0:.*]] = constant 0 : index - // CHECK: %[[C1:.*]] = constant 1 : index - // CHECK: scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] { - // CHECK: %[[DIM:.]] = dim %[[ARG]], %[[I]] : tensor<*xf32> - // CHECK: store %[[DIM]], %[[SHAPE_MEM]][%[[I]]] : memref - // CHECK: } - // CHECK: %[[SHAPE:.*]] = tensor_load %[[SHAPE_MEM]] : memref + // CHECK: %[[SHAPE:.*]] = dynamic_tensor_from_elements %[[RANK]] { + // CHECK: ^bb0(%[[I:.*]]: index): + // CHECK: %[[EXTENT:.*]] = dim %[[ARG]], %[[I]] : tensor<*xf32> + // CHECK: yield %[[EXTENT]] : index + // CHECK: } : tensor %shape = shape.shape_of %arg : tensor<*xf32> -> tensor return } From 32c8da41dc0cb99651823a1a21130c2cbdf688e1 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 09:54:47 +0200 Subject: [PATCH 155/161] [lldb] Don't infinite loop in SemaSourceWithPriorities::CompleteType when trying to complete a forward decl SemaSourceWithPriorities is a special SemaSource that wraps our normal LLDB ExternalASTSource and the ASTReader (which is used for the C++ module loading). It's only active when the `import-std-module` setting is turned on. The `CompleteType` function there in `SemaSourceWithPriorities` is looping over all ExternalASTSources and asks each to complete the type. However, that loop is in another loop that keeps doing that until the type is complete. If that function is ever called on a type that is a forward decl then that causes LLDB to go into an infinite loop. I remember I added that second loop and the comment because I thought I saw a similar pattern in some other Clang code, but after some grepping I can't find that code anywhere and it seems the rest of the code base only calls CompleteType once (It would also be kinda silly to have calling it multiple times). So it seems that's just a silly mistake. The is implicitly tested by importing `std::pair`, but I also added a simpler dedicated test that creates a dummy libc++ module with some forward declarations and then imports them into the scratch AST context. At some point the ASTImporter will check if one of the forward decls could be completed by the ExternalASTSource, which will cause the `SemaSourceWithPriorities` to go into an infinite loop once it receives the `CompleteType` call. Reviewed By: shafik Differential Revision: https://reviews.llvm.org/D87289 --- .../Plugins/ExpressionParser/Clang/ASTUtils.h | 15 +++---- .../forward_decl_from_module/Makefile | 9 +++++ .../TestForwardDeclFromStdModule.py | 39 +++++++++++++++++++ .../forward_decl_from_module/main.cpp | 8 ++++ .../root/usr/include/c++/v1/module.modulemap | 3 ++ .../root/usr/include/c++/v1/vector | 14 +++++++ .../root/usr/include/libc_header.h | 1 + 7 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector create mode 100644 lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h index 769b18d54cedd6..b70ec223df4dff 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.h @@ -359,15 +359,12 @@ class SemaSourceWithPriorities : public clang::ExternalSemaSource { } void CompleteType(clang::TagDecl *Tag) override { - while (!Tag->isCompleteDefinition()) - for (size_t i = 0; i < Sources.size(); ++i) { - // FIXME: We are technically supposed to loop here too until - // Tag->isCompleteDefinition() is true, but if our low quality source - // is failing to complete the tag this code will deadlock. - Sources[i]->CompleteType(Tag); - if (Tag->isCompleteDefinition()) - break; - } + for (clang::ExternalSemaSource *S : Sources) { + S->CompleteType(Tag); + // Stop after the first source completed the type. + if (Tag->isCompleteDefinition()) + break; + } } void CompleteType(clang::ObjCInterfaceDecl *Class) override { diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile new file mode 100644 index 00000000000000..4915cdae876413 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/Makefile @@ -0,0 +1,9 @@ +# We don't have any standard include directories, so we can't +# parse the test_common.h header we usually inject as it includes +# system headers. +NO_TEST_COMMON_H := 1 + +CXXFLAGS_EXTRAS = -I $(SRCDIR)/root/usr/include/c++/v1/ -I $(SRCDIR)/root/usr/include/ -nostdinc -nostdinc++ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py new file mode 100644 index 00000000000000..48459abb926686 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/TestForwardDeclFromStdModule.py @@ -0,0 +1,39 @@ +""" +Tests forward declarations coming from the `std` module. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil +import os + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + # We only emulate a fake libc++ in this test and don't use the real libc++, + # but we still add the libc++ category so that this test is only run in + # test configurations where libc++ is actually supposed to be tested. + @add_test_categories(["libc++"]) + @skipIfRemote + @skipIf(compiler=no_match("clang")) + def test(self): + self.build() + + sysroot = os.path.join(os.getcwd(), "root") + + # Set the sysroot where our dummy libc++ exists. + self.runCmd("platform select --sysroot '" + sysroot + "' host", CURRENT_EXECUTABLE_SET) + + lldbutil.run_to_source_breakpoint(self, + "// Set break point at this line.", lldb.SBFileSpec("main.cpp")) + + self.runCmd("settings set target.import-std-module true") + + # Print the dummy `std::vector`. It only has the dummy member in it + # so the standard `std::vector` formatter can't format it. Instead use + # the raw output so LLDB has to show the member variable. + # Both `std::vector` and the type of the member have forward + # declarations before their definitions. + self.expect("expr --raw -- v", + substrs=['(std::__1::vector) $0 = {', 'f = 0x', '}']) diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp new file mode 100644 index 00000000000000..a0b02d5c68141b --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/main.cpp @@ -0,0 +1,8 @@ +#include + +int main(int argc, char **argv) { + // Makes sure we have the mock libc headers in the debug information. + libc_struct s; + std::vector v; + return 0; // Set break point at this line. +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap new file mode 100644 index 00000000000000..f149be7b7d21ac --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/module.modulemap @@ -0,0 +1,3 @@ +module std { + module "vector" { header "vector" export * } +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector new file mode 100644 index 00000000000000..c2d77aab071109 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/c++/v1/vector @@ -0,0 +1,14 @@ +#include "libc_header.h" + +namespace std { + inline namespace __1 { + // A forward decl of `vector`. + template class vector; + // Pretend to be a std::vector template we need to instantiate in LLDB + // when import-std-module is enabled. + template + struct vector { class F; F *f; }; + // The definition of our forward declared nested class. + template class vector::F { int x; }; + } +} diff --git a/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h new file mode 100644 index 00000000000000..47525c9db3467f --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/forward_decl_from_module/root/usr/include/libc_header.h @@ -0,0 +1 @@ +struct libc_struct {}; From 2bcc4db761768f1b7431237920f26360549ca268 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 9 Sep 2020 09:00:41 +0100 Subject: [PATCH 156/161] [EarlyCSE] Explicitly require AAResultsWrapperPass. The MemorySSAWrapperPass depends on AAResultsWrapperPass and if MemorySSA is preserved but AAResultsWrapperPass is not, this could lead to a crash when updating the last user of the MemorySSAWrapperPass. Alternatively AAResultsWrapperPass could be marked preserved by GVN, but I am not sure if that would be safe. I am not sure what is required in order to preserve AAResultsWrapperPass. At the moment, it seems like a couple of passes that do similar transforms to GVN are preserving it. Reviewed By: asbirlea Differential Revision: https://reviews.llvm.org/D87137 --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 2 ++ llvm/lib/Transforms/Scalar/GVN.cpp | 1 - llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 51da10fc48790d..b655204d26dd24 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1463,6 +1463,7 @@ class EarlyCSELegacyCommonPass : public FunctionPass { AU.addRequired(); AU.addRequired(); if (UseMemorySSA) { + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -1504,6 +1505,7 @@ INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa", "Early CSE w/ MemorySSA", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index c71038d66f9956..036ca1d1054fee 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2850,7 +2850,6 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { if (Impl.isMemDepEnabled()) AU.addRequired(); AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); diff --git a/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll b/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll new file mode 100644 index 00000000000000..744389c24db284 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/reuse-preserved-memoryssa.ll @@ -0,0 +1,7 @@ +; RUN: opt -memoryssa -gvn -early-cse-memssa %s -S | FileCheck %s + +; CHECK: define void @foo( + +define void @foo() { + ret void +} From 7866b91405693df5b4cf6ba770b3a92d48b0c508 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 10:16:56 +0200 Subject: [PATCH 157/161] [lldb] Fix a crash when the ASTImporter is giving us two Imported callbacks for the same target decl The ASTImporter has an `Imported(From, To)` callback that notifies subclasses that a declaration has been imported in some way. LLDB uses this in the `CompleteTagDeclsScope` to see which records have been imported into the scratch context. If the record was declared inside the expression, then the `CompleteTagDeclsScope` will forcibly import the full definition of that record to the scratch context so that the expression AST can safely be disposed later (otherwise we might end up going back to the deleted AST to complete the minimally imported record). The way this is implemented is that there is a list of decls that need to be imported (`m_decls_to_complete`) and we keep completing the declarations inside that list until the list is empty. Every `To` Decl we get via the `Imported` callback will be added to the list of Decls to be completed. There are some situations where the ASTImporter will actually give us two `Imported` calls with the same `To` Decl. One way where this happens is if the ASTImporter decides to merge an imported definition into an already imported one. Another way is that the ASTImporter just happens to get two calls to `ASTImporter::Import` for the same Decl. This for example happens when importing the DeclContext of a Decl requires importing the Decl itself, such as when importing a RecordDecl that was declared inside a function. The bug addressed in this patch is that when we end up getting two `Imported` calls for the same `To` Decl, then we would crash in the `CompleteTagDeclsScope`. That's because the first time we complete the Decl we remove the Origin tracking information (that maps the Decl back to from where it came from). The next time we try to complete the same `To` Decl the Origin tracking information is gone and we hit the `to_context_md->getOrigin(decl).ctx == m_src_ctx` assert (`getOrigin(decl).ctx` is a nullptr the second time as the Origin was deleted). This is actually a regression coming from D72495. Before D72495 `m_decls_to_complete` was actually a set so every declaration in there could only be queued once to be completed. The set was changed to a vector to make the iteration over it deterministic, but that also causes that we now potentially end up trying to complete a Decl twice. This patch essentially just reverts D72495 and makes the `CompleteTagDeclsScope` use a SetVector for the list of declarations to be completed. The SetVector should filter out the duplicates (as the original `set` did) and also ensure that the completion order is deterministic. I actually couldn't find any way to cause LLDB to reproduce this bug by merging declarations (this would require that we for example declare two namespaces in a non-top-level expression which isn't possible). But the bug reproduces very easily by just declaring a class in an expression, so that's what the test is doing. Reviewed By: shafik Differential Revision: https://reviews.llvm.org/D85648 --- .../Clang/ClangASTImporter.cpp | 13 +++++-- .../TestRecordDeclInExpr.py | 34 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 73042c205a5ae7..e2601a059bb77f 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -216,7 +216,12 @@ namespace { /// imported while completing the original Decls). class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { ClangASTImporter::ImporterDelegateSP m_delegate; - llvm::SmallVector m_decls_to_complete; + /// List of declarations in the target context that need to be completed. + /// Every declaration should only be completed once and therefore should only + /// be once in this list. + llvm::SetVector m_decls_to_complete; + /// Set of declarations that already were successfully completed (not just + /// added to m_decls_to_complete). llvm::SmallPtrSet m_decls_already_completed; clang::ASTContext *m_dst_ctx; clang::ASTContext *m_src_ctx; @@ -244,6 +249,9 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { NamedDecl *decl = m_decls_to_complete.pop_back_val(); m_decls_already_completed.insert(decl); + // The decl that should be completed has to be imported into the target + // context from some other context. + assert(to_context_md->hasOrigin(decl)); // We should only complete decls coming from the source context. assert(to_context_md->getOrigin(decl).ctx == m_src_ctx); @@ -287,7 +295,8 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { // Check if we already completed this type. if (m_decls_already_completed.count(to_named_decl) != 0) return; - m_decls_to_complete.push_back(to_named_decl); + // Queue this type to be completed. + m_decls_to_complete.insert(to_named_decl); } }; } // namespace diff --git a/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py b/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py new file mode 100644 index 00000000000000..16bf098dce8f31 --- /dev/null +++ b/lldb/test/API/lang/c/record_decl_in_expr/TestRecordDeclInExpr.py @@ -0,0 +1,34 @@ +""" +Tests declaring RecordDecls in non-top-level expressions. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test_fwd_decl(self): + # Declare a forward decl and import it to the scratch AST. + self.expect_expr("struct S; S *s = nullptr; s", result_type="S *") + + @no_debug_info_test + def test_struct(self): + # Declare a struct and import it to the scratch AST. + self.expect("expr struct S {}; S s; s", substrs=["= {}"]) + + @no_debug_info_test + def test_struct_with_fwd_decl(self): + # Import the forward decl to the scratch AST. + self.expect_expr("struct S; S *s = nullptr; s", result_type="S *") + # Merge the definition into the scratch AST. + self.expect("expr struct S {}; S s; s", substrs=["= {}"]) + + @no_debug_info_test + def test_struct_with_fwd_decl_same_expr(self): + # Test both a forward decl and a definition in one expression and + # import them into the scratch AST. + self.expect("expr struct S; struct S{}; S s; s", substrs=["= {}"]) From 37a7c0a00773f135d909eb9eba7f82547aee1e89 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 9 Sep 2020 15:44:25 +0700 Subject: [PATCH 158/161] [Test] Add failing test for pr47457 --- llvm/test/Transforms/LoopLoadElim/pr47457.ll | 45 ++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 llvm/test/Transforms/LoopLoadElim/pr47457.ll diff --git a/llvm/test/Transforms/LoopLoadElim/pr47457.ll b/llvm/test/Transforms/LoopLoadElim/pr47457.ll new file mode 100644 index 00000000000000..1b102944cd767a --- /dev/null +++ b/llvm/test/Transforms/LoopLoadElim/pr47457.ll @@ -0,0 +1,45 @@ +; RUN: opt -loop-load-elim -S %s | FileCheck %s +; RUN: opt -passes=loop-load-elim -S %s | FileCheck %s +; REQUIRES: asserts +; XFAIL: * + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +define void @test() { +; CHECK-LABEL: test + +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb1, %bb + %tmp = phi i32 [ undef, %bb ], [ 0, %bb1 ], [ %tmp3, %bb6 ] + br i1 undef, label %bb1, label %bb2 + +bb2: ; preds = %bb1 + %tmp3 = add i32 %tmp, 1 + %tmp4 = icmp ult i32 %tmp, undef + br i1 %tmp4, label %bb6, label %bb5 + +bb5: ; preds = %bb2 + ret void + +bb6: ; preds = %bb2 + br i1 undef, label %bb7, label %bb1 + +bb7: ; preds = %bb7, %bb6 + %tmp8 = phi i32 [ %tmp15, %bb7 ], [ %tmp3, %bb6 ] + %tmp9 = phi i32 [ %tmp8, %bb7 ], [ %tmp, %bb6 ] + %tmp10 = zext i32 %tmp9 to i64 + %tmp11 = getelementptr inbounds float, float addrspace(1)* null, i64 %tmp10 + %tmp12 = load float, float addrspace(1)* %tmp11, align 4 + %tmp13 = zext i32 %tmp8 to i64 + %tmp14 = getelementptr inbounds float, float addrspace(1)* null, i64 %tmp13 + store float 1.000000e+00, float addrspace(1)* %tmp14, align 4 + %tmp15 = add nuw nsw i32 %tmp8, 1 + %tmp16 = icmp sgt i32 %tmp8, 78 + br i1 %tmp16, label %bb17, label %bb7 + +bb17: ; preds = %bb7 + unreachable +} From b85222520f861a1812f991d6bd65950dda22f31b Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Wed, 9 Sep 2020 10:35:56 +0200 Subject: [PATCH 159/161] [lldb] Enable std::pair in CxxModuleHandler This adds support for substituting std::pair instantiations with enabled import-std-module. With the fixes in parent revisions we can currently substitute a single pair (however, a result that returns a second pair currently causes LLDB to crash while importing the second template instantiation). Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D85141 --- .../Clang/CxxModuleHandler.cpp | 1 + .../import-std-module/pair/Makefile | 3 +++ .../pair/TestPairFromStdModule.py | 25 +++++++++++++++++++ .../import-std-module/pair/main.cpp | 6 +++++ 4 files changed, 35 insertions(+) create mode 100644 lldb/test/API/commands/expression/import-std-module/pair/Makefile create mode 100644 lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py create mode 100644 lldb/test/API/commands/expression/import-std-module/pair/main.cpp diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp index 2f8cf1846ee774..38d9f8d1e4b805 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp @@ -34,6 +34,7 @@ CxxModuleHandler::CxxModuleHandler(ASTImporter &importer, ASTContext *target) "weak_ptr", // utility "allocator", + "pair", }; m_supported_templates.insert(supported_names.begin(), supported_names.end()); } diff --git a/lldb/test/API/commands/expression/import-std-module/pair/Makefile b/lldb/test/API/commands/expression/import-std-module/pair/Makefile new file mode 100644 index 00000000000000..f938f7428468ab --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/Makefile @@ -0,0 +1,3 @@ +USE_LIBCPP := 1 +CXX_SOURCES := main.cpp +include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py new file mode 100644 index 00000000000000..4f5b1ea8028b07 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/TestPairFromStdModule.py @@ -0,0 +1,25 @@ +""" +Test basic std::pair functionality. +""" + +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @add_test_categories(["libc++"]) + @skipIf(compiler=no_match("clang")) + def test(self): + self.build() + + lldbutil.run_to_source_breakpoint(self, + "// Set break point at this line.", lldb.SBFileSpec("main.cpp")) + + self.runCmd("settings set target.import-std-module true") + + self.expect_expr("pair_int.first", result_type="int", result_value="1234") + self.expect_expr("pair_int.second", result_type="int", result_value="5678") + self.expect("expr pair_int", substrs=['first = 1234, second = 5678']) \ No newline at end of file diff --git a/lldb/test/API/commands/expression/import-std-module/pair/main.cpp b/lldb/test/API/commands/expression/import-std-module/pair/main.cpp new file mode 100644 index 00000000000000..1363698f1fc7f6 --- /dev/null +++ b/lldb/test/API/commands/expression/import-std-module/pair/main.cpp @@ -0,0 +1,6 @@ +#include + +int main(int argc, char **argv) { + std::pair pair_int(1234, 5678); + return 0; // Set break point at this line. +} From feb0b9c3bba7db6d547b552c3cdaa838559da664 Mon Sep 17 00:00:00 2001 From: Marcel Koester Date: Fri, 7 Aug 2020 12:22:45 +0200 Subject: [PATCH 160/161] [mlir] Added support for loops to BufferPlacement transformation. The current BufferPlacement transformation cannot handle loops properly. Buffers passed via backedges will not be freed automatically introducing memory leaks. This CL adds support for loops to overcome these limitations. Differential Revision: https://reviews.llvm.org/D85513 --- mlir/lib/Transforms/BufferPlacement.cpp | 236 +++++++++++++---- mlir/test/Transforms/buffer-placement.mlir | 292 +++++++++++++++++++++ 2 files changed, 474 insertions(+), 54 deletions(-) diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp index 0279129758ab83..9f2c254f91e513 100644 --- a/mlir/lib/Transforms/BufferPlacement.cpp +++ b/mlir/lib/Transforms/BufferPlacement.cpp @@ -48,11 +48,10 @@ // will be freed in the end. // // TODO: -// The current implementation does not support loops and the resulting code will -// be invalid with respect to program semantics. The only thing that is -// currently missing is a high-level loop analysis that allows us to move allocs -// and deallocs outside of the loop blocks. Furthermore, it doesn't also accept -// functions which return buffers already. +// The current implementation does not support explicit-control-flow loops and +// the resulting code will be invalid with respect to program semantics. +// However, structured control-flow loops are fully supported. Furthermore, it +// doesn't accept functions which return buffers already. // //===----------------------------------------------------------------------===// @@ -77,6 +76,22 @@ static void walkReturnOperations(Region *region, const FuncT &func) { } } +/// Wrapper for the actual `RegionBranchOpInterface.getSuccessorRegions` +/// function that initializes the required `operandAttributes` array. +static void getSuccessorRegions(RegionBranchOpInterface regionInterface, + llvm::Optional index, + SmallVectorImpl &successors) { + // Create a list of null attributes for each operand to comply with the + // `getSuccessorRegions` interface definition that requires a single + // attribute per operand. + SmallVector operandAttributes( + regionInterface.getOperation()->getNumOperands()); + + // Get all successor regions using the temporarily allocated + // `operandAttributes`. + regionInterface.getSuccessorRegions(index, operandAttributes, successors); +} + namespace { //===----------------------------------------------------------------------===// // BufferPlacementAliasAnalysis @@ -166,16 +181,10 @@ class BufferPlacementAliasAnalysis { // Query the RegionBranchOpInterface to find potential successor regions. op->walk([&](RegionBranchOpInterface regionInterface) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); - // Extract all entry regions and wire all initial entry successor inputs. SmallVector entrySuccessors; - regionInterface.getSuccessorRegions(/*index=*/llvm::None, - operandAttributes, entrySuccessors); + getSuccessorRegions(regionInterface, /*index=*/llvm::None, + entrySuccessors); for (RegionSuccessor &entrySuccessor : entrySuccessors) { // Wire the entry region's successor arguments with the initial // successor inputs. @@ -191,8 +200,8 @@ class BufferPlacementAliasAnalysis { // Iterate over all successor region entries that are reachable from the // current region. SmallVector successorRegions; - regionInterface.getSuccessorRegions( - region.getRegionNumber(), operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); for (RegionSuccessor &successorRegion : successorRegions) { // Iterate over all immediate terminator operations and wire the // successor inputs with the operands of each terminator. @@ -209,6 +218,83 @@ class BufferPlacementAliasAnalysis { ValueMapT aliases; }; +//===----------------------------------------------------------------------===// +// Backedges +//===----------------------------------------------------------------------===// + +/// A straight-forward program analysis which detects loop backedges induced by +/// explicit control flow. +class Backedges { +public: + using BlockSetT = SmallPtrSet; + using BackedgeSetT = llvm::DenseSet>; + +public: + /// Constructs a new backedges analysis using the op provided. + Backedges(Operation *op) { recurse(op, op->getBlock()); } + + /// Returns the number of backedges formed by explicit control flow. + size_t size() const { return edgeSet.size(); } + + /// Returns the start iterator to loop over all backedges. + BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } + + /// Returns the end iterator to loop over all backedges. + BackedgeSetT::const_iterator end() const { return edgeSet.end(); } + +private: + /// Enters the current block and inserts a backedge into the `edgeSet` if we + /// have already visited the current block. The inserted edge links the given + /// `predecessor` with the `current` block. + bool enter(Block ¤t, Block *predecessor) { + bool inserted = visited.insert(¤t).second; + if (!inserted) + edgeSet.insert(std::make_pair(predecessor, ¤t)); + return inserted; + } + + /// Leaves the current block. + void exit(Block ¤t) { visited.erase(¤t); } + + /// Recurses into the given operation while taking all attached regions into + /// account. + void recurse(Operation *op, Block *predecessor) { + Block *current = op->getBlock(); + // If the current op implements the `BranchOpInterface`, there can be + // cycles in the scope of all successor blocks. + if (isa(op)) { + for (Block *succ : current->getSuccessors()) + recurse(*succ, current); + } + // Recurse into all distinct regions and check for explicit control-flow + // loops. + for (Region ®ion : op->getRegions()) + recurse(region.front(), current); + } + + /// Recurses into explicit control-flow structures that are given by + /// the successor relation defined on the block level. + void recurse(Block &block, Block *predecessor) { + // Try to enter the current block. If this is not possible, we are + // currently processing this block and can safely return here. + if (!enter(block, predecessor)) + return; + + // Recurse into all operations and successor blocks. + for (auto &op : block.getOperations()) + recurse(&op, predecessor); + + // Leave the current block. + exit(block); + } + + /// Stores all blocks that are currently visited and on the processing stack. + BlockSetT visited; + + /// Stores all backedges in the format (source, target). + BackedgeSetT edgeSet; +}; + //===----------------------------------------------------------------------===// // BufferPlacement //===----------------------------------------------------------------------===// @@ -357,9 +443,14 @@ class BufferPlacement { for (Value value : it->second) { if (valuesToFree.count(value) > 0) continue; - // Check whether we have to free this particular block argument. - if (!dominators.dominates(definingBlock, value.getParentBlock())) { - toProcess.emplace_back(value, value.getParentBlock()); + Block *parentBlock = value.getParentBlock(); + // Check whether we have to free this particular block argument or + // generic value. We have to free the current alias if it is either + // defined in a non-dominated block or it is defined in the same block + // but the current value is not dominated by the source value. + if (!dominators.dominates(definingBlock, parentBlock) || + (definingBlock == parentBlock && value.isa())) { + toProcess.emplace_back(value, parentBlock); valuesToFree.insert(value); } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) .second) @@ -431,22 +522,42 @@ class BufferPlacement { // argument belongs to the first block in a region and the parent operation // implements the RegionBranchOpInterface. Region *argRegion = block->getParent(); + Operation *parentOp = argRegion->getParentOp(); RegionBranchOpInterface regionInterface; if (!argRegion || &argRegion->front() != block || - !(regionInterface = - dyn_cast(argRegion->getParentOp()))) + !(regionInterface = dyn_cast(parentOp))) return; introduceCopiesForRegionSuccessors( - regionInterface, argRegion->getParentOp()->getRegions(), + regionInterface, argRegion->getParentOp()->getRegions(), blockArg, [&](RegionSuccessor &successorRegion) { // Find a predecessor of our argRegion. return successorRegion.getSuccessor() == argRegion; - }, - [&](RegionSuccessor &successorRegion) { - // The operand index will be the argument number. - return blockArg.getArgNumber(); }); + + // Check whether the block argument belongs to an entry region of the + // parent operation. In this case, we have to introduce an additional copy + // for buffer that is passed to the argument. + SmallVector successorRegions; + getSuccessorRegions(regionInterface, llvm::None, successorRegions); + auto *it = + llvm::find_if(successorRegions, [&](RegionSuccessor &successorRegion) { + return successorRegion.getSuccessor() == argRegion; + }); + if (it == successorRegions.end()) + return; + + // Determine the actual operand to introduce a copy for and rewire the + // operand to point to the copy instead. + Value operand = + regionInterface.getSuccessorEntryOperands(argRegion->getRegionNumber()) + [llvm::find(it->getSuccessorInputs(), blockArg).getIndex()]; + Value copy = introduceBufferCopy(operand, parentOp); + + auto op = llvm::find(parentOp->getOperands(), operand); + assert(op != parentOp->getOperands().end() && + "parentOp does not contain operand"); + parentOp->setOperand(op.getIndex(), copy); } /// Introduces temporary allocs in front of all associated nested-region @@ -455,42 +566,34 @@ class BufferPlacement { // Get the actual result index in the scope of the parent terminator. Operation *operation = value.getDefiningOp(); auto regionInterface = cast(operation); - introduceCopiesForRegionSuccessors( - regionInterface, operation->getRegions(), - [&](RegionSuccessor &successorRegion) { - // Determine whether this region has a successor entry that leaves - // this region by returning to its parent operation. - return !successorRegion.getSuccessor(); - }, - [&](RegionSuccessor &successorRegion) { - // Find the associated success input index. - return llvm::find(successorRegion.getSuccessorInputs(), value) - .getIndex(); - }); + // Filter successors that return to the parent operation. + auto regionPredicate = [&](RegionSuccessor &successorRegion) { + // If the RegionSuccessor has no associated successor, it will return to + // its parent operation. + return !successorRegion.getSuccessor(); + }; + // Introduce a copy for all region "results" that are returned to the parent + // operation. This is required since the parent's result value has been + // considered critical. Therefore, the algorithm assumes that a copy of a + // previously allocated buffer is returned by the operation (like in the + // case of a block argument). + introduceCopiesForRegionSuccessors(regionInterface, operation->getRegions(), + value, regionPredicate); } /// Introduces buffer copies for all terminators in the given regions. The /// regionPredicate is applied to every successor region in order to restrict - /// the copies to specific regions. Thereby, the operandProvider is invoked - /// for each matching region successor and determines the operand index that - /// requires a buffer copy. - template - void - introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface, - MutableArrayRef regions, - const TPredicate ®ionPredicate, - const TOperandProvider &operandProvider) { - // Create an empty attribute for each operand to comply with the - // `getSuccessorRegions` interface definition that requires a single - // attribute per operand. - SmallVector operandAttributes( - regionInterface.getOperation()->getNumOperands()); + /// the copies to specific regions. + template + void introduceCopiesForRegionSuccessors( + RegionBranchOpInterface regionInterface, MutableArrayRef regions, + Value argValue, const TPredicate ®ionPredicate) { for (Region ®ion : regions) { // Query the regionInterface to get all successor regions of the current // one. SmallVector successorRegions; - regionInterface.getSuccessorRegions(region.getRegionNumber(), - operandAttributes, successorRegions); + getSuccessorRegions(regionInterface, region.getRegionNumber(), + successorRegions); // Try to find a matching region successor. RegionSuccessor *regionSuccessor = llvm::find_if(successorRegions, regionPredicate); @@ -498,7 +601,9 @@ class BufferPlacement { continue; // Get the operand index in the context of the current successor input // bindings. - auto operandIndex = operandProvider(*regionSuccessor); + size_t operandIndex = + llvm::find(regionSuccessor->getSuccessorInputs(), argValue) + .getIndex(); // Iterate over all immediate terminator operations to introduce // new buffer allocations. Thereby, the appropriate terminator operand @@ -518,6 +623,16 @@ class BufferPlacement { /// its content into the newly allocated buffer. The terminator operation is /// used to insert the alloc and copy operations at the right places. Value introduceBufferCopy(Value sourceValue, Operation *terminator) { + // Avoid multiple copies of the same source value. This can happen in the + // presence of loops when a branch acts as a backedge while also having + // another successor that returns to its parent operation. Note: that + // copying copied buffers can introduce memory leaks since the invariant of + // BufferPlacement assumes that a buffer will be only copied once into a + // temporary buffer. Hence, the construction of copy chains introduces + // additional allocations that are not tracked automatically by the + // algorithm. + if (copiedValues.contains(sourceValue)) + return sourceValue; // Create a new alloc at the current location of the terminator. auto memRefType = sourceValue.getType().cast(); OpBuilder builder(terminator); @@ -541,6 +656,8 @@ class BufferPlacement { // allocation to the new one. builder.create(terminator->getLoc(), sourceValue, alloc); + // Remember the copy of original source value. + copiedValues.insert(alloc); return alloc; } @@ -652,6 +769,9 @@ class BufferPlacement { /// Maps allocation nodes to their associated blocks. AllocEntryList allocs; + // Stores already copied allocations to avoid additional copies of copies. + ValueSetT copiedValues; + /// The underlying liveness analysis to compute fine grained information /// about alloc and dealloc positions. Liveness liveness; @@ -673,6 +793,14 @@ class BufferPlacement { struct BufferPlacementPass : BufferPlacementBase { void runOnFunction() override { + // Ensure that there are supported loops only. + Backedges backedges(getFunction()); + if (backedges.size()) { + getFunction().emitError( + "Structured control-flow loops are supported only."); + return; + } + // Place all required alloc, copy and dealloc nodes. BufferPlacement placement(getFunction()); placement.place(); diff --git a/mlir/test/Transforms/buffer-placement.mlir b/mlir/test/Transforms/buffer-placement.mlir index e1ed2c4309c3db..dc9ff44bf4838e 100644 --- a/mlir/test/Transforms/buffer-placement.mlir +++ b/mlir/test/Transforms/buffer-placement.mlir @@ -1125,3 +1125,295 @@ func @nestedRegionControlFlowAlloca( // CHECK: %[[ALLOCA:.*]] = alloca(%arg0, %arg1) // CHECK-NEXT: scf.yield %[[ALLOC0]] // CHECK: return %[[ALLOC1]] + +// ----- + +// Test Case: structured control-flow loop using a nested alloc. +// The alloc positions of %3 will not be changed, but the iteration argument +// %iterBuf has to be freed before yielding %3 to avoid memory leaks. + +// ----- + +// CHECK-LABEL: func @loop_alloc +func @loop_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = alloc() : memref<2xf32> + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: cmpi +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = alloc() +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK: dealloc %[[ALLOC3]] +// CHECK: scf.yield %[[ALLOC4]] +// CHECK: } +// CHECK: linalg.copy(%[[ALLOC2]], %arg4) +// CHECK-NEXT: dealloc %[[ALLOC2]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation. +// The loop yields buffers that have been defined outside of the loop and the +// backeges only use the iteration arguments (or one of its aliases). +// Therefore, we do not have to (and are not allowed to) free any buffers +// that are passed via the backedges. + +// CHECK-LABEL: func @loop_nested_if_no_alloc +func @loop_nested_if_no_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + scf.yield %0 : memref<2xf32> + } else { + scf.yield %iterBuf : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC1:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = +// CHECK: %[[ALLOC2:.*]] = scf.if +// CHECK: scf.yield %[[ALLOC0]] +// CHECK: scf.yield %[[IALLOC]] +// CHECK: scf.yield %[[ALLOC2]] +// CHECK: linalg.copy(%[[ALLOC1]], %arg4) +// CHECK: dealloc %[[ALLOC0]] + +// ----- + +// Test Case: structured control-flow loop with a nested if operation using +// a deeply nested buffer allocation. +// Since the innermost allocation happens in a divergent branch, we have to +// introduce additional copies for the nested if operation. Since the loop's +// yield operation "returns" %3, it will return a newly allocated buffer. +// Therefore, we have to free the iteration argument %iterBuf before +// "returning" %3. + +// CHECK-LABEL: func @loop_nested_if_alloc +func @loop_nested_if_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>) -> memref<2xf32> { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = cmpi "eq", %i, %ub : index + %3 = scf.if %2 -> (memref<2xf32>) { + %4 = alloc() : memref<2xf32> + scf.yield %4 : memref<2xf32> + } else { + scf.yield %0 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + return %1 : memref<2xf32> +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[ALLOC2:.*]] = scf.for {{.*}} iter_args(%[[IALLOC:.*]] = %[[ALLOC1]] +// CHECK: dealloc %[[IALLOC]] +// CHECK: %[[ALLOC3:.*]] = scf.if + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC4]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[ALLOC4]] +// CHECK-NEXT: scf.yield %[[ALLOC5]] + +// CHECK: %[[ALLOC6:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC0]], %[[ALLOC6]]) +// CHECK-NEXT: scf.yield %[[ALLOC6]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3:.*]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: dealloc %[[ALLOC0]] +// CHECK-NEXT: return %[[ALLOC2]] + +// ----- + +// Test Case: several nested structured control-flow loops with a deeply nested +// buffer allocation inside an if operation. +// Same behavior is an loop_nested_if_alloc: we have to insert deallocations +// before each yield in all loops recursively. + +// CHECK-LABEL: func @loop_nested_alloc +func @loop_nested_alloc( + %lb: index, + %ub: index, + %step: index, + %buf: memref<2xf32>, + %res: memref<2xf32>) { + %0 = alloc() : memref<2xf32> + %1 = scf.for %i = %lb to %ub step %step + iter_args(%iterBuf = %buf) -> memref<2xf32> { + %2 = scf.for %i2 = %lb to %ub step %step + iter_args(%iterBuf2 = %iterBuf) -> memref<2xf32> { + %3 = scf.for %i3 = %lb to %ub step %step + iter_args(%iterBuf3 = %iterBuf2) -> memref<2xf32> { + %4 = alloc() : memref<2xf32> + %5 = cmpi "eq", %i, %ub : index + %6 = scf.if %5 -> (memref<2xf32>) { + %7 = alloc() : memref<2xf32> + scf.yield %7 : memref<2xf32> + } else { + scf.yield %iterBuf3 : memref<2xf32> + } + scf.yield %6 : memref<2xf32> + } + scf.yield %3 : memref<2xf32> + } + scf.yield %2 : memref<2xf32> + } + "linalg.copy"(%1, %res) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// CHECK: %[[ALLOC0:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC0]] +// CHECK-NEXT: %[[ALLOC1:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%arg3, %[[ALLOC1]]) +// CHECK-NEXT: %[[VAL_7:.*]] = scf.for {{.*}} iter_args(%[[IALLOC0:.*]] = %[[ALLOC1]]) +// CHECK: %[[ALLOC2:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC0]], %[[ALLOC2]]) +// CHECK-NEXT: dealloc %[[IALLOC0]] +// CHECK-NEXT: %[[ALLOC3:.*]] = scf.for {{.*}} iter_args(%[[IALLOC1:.*]] = %[[ALLOC2]]) +// CHECK: %[[ALLOC5:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC1]], %[[ALLOC5]]) +// CHECK-NEXT: dealloc %[[IALLOC1]] + +// CHECK: %[[ALLOC6:.*]] = scf.for {{.*}} iter_args(%[[IALLOC2:.*]] = %[[ALLOC5]]) +// CHECK: %[[ALLOC8:.*]] = alloc() +// CHECK-NEXT: dealloc %[[ALLOC8]] +// CHECK: %[[ALLOC9:.*]] = scf.if + +// CHECK: %[[ALLOC11:.*]] = alloc() +// CHECK-NEXT: %[[ALLOC12:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC11]], %[[ALLOC12]]) +// CHECK-NEXT: dealloc %[[ALLOC11]] +// CHECK-NEXT: scf.yield %[[ALLOC12]] + +// CHECK: %[[ALLOC13:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[IALLOC2]], %[[ALLOC13]]) +// CHECK-NEXT: scf.yield %[[ALLOC13]] + +// CHECK: dealloc %[[IALLOC2]] +// CHECK-NEXT: %[[ALLOC10:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC9]], %[[ALLOC10]]) +// CHECK-NEXT: dealloc %[[ALLOC9]] +// CHECK-NEXT: scf.yield %[[ALLOC10]] + +// CHECK: %[[ALLOC7:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC6]], %[[ALLOC7]]) +// CHECK-NEXT: dealloc %[[ALLOC6]] +// CHECK-NEXT: scf.yield %[[ALLOC7]] + +// CHECK: %[[ALLOC4:.*]] = alloc() +// CHECK-NEXT: linalg.copy(%[[ALLOC3]], %[[ALLOC4]]) +// CHECK-NEXT: dealloc %[[ALLOC3]] +// CHECK-NEXT: scf.yield %[[ALLOC4]] + +// CHECK: linalg.copy(%[[VAL_7]], %arg4) +// CHECK-NEXT: dealloc %[[VAL_7]] + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @loop_dynalloc +func @loop_dynalloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref, + %arg3: memref) { + %const0 = constant 0 : i32 + br ^loopHeader(%const0, %arg2 : i32, memref) + +^loopHeader(%i : i32, %buff : memref): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref), + ^exit(%buff : memref) + +^loopBody(%val : i32, %buff2: memref): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %size = std.index_cast %inc : i32 to index + %alloc1 = alloc(%size) : memref + br ^loopHeader(%inc, %alloc1 : i32, memref) + +^exit(%buff3 : memref): + "linalg.copy"(%buff3, %arg3) : (memref, memref) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}} + +// ----- + +// Test Case: explicit control-flow loop with a dynamically allocated buffer. +// The BufferPlacement transformation should fail on this explicit +// control-flow loop since they are not supported. + +// CHECK-LABEL: func @do_loop_alloc +func @do_loop_alloc( + %arg0 : i32, + %arg1 : i32, + %arg2: memref<2xf32>, + %arg3: memref<2xf32>) { + %const0 = constant 0 : i32 + br ^loopBody(%const0, %arg2 : i32, memref<2xf32>) + +^loopBody(%val : i32, %buff2: memref<2xf32>): + %const1 = constant 1 : i32 + %inc = addi %val, %const1 : i32 + %alloc1 = alloc() : memref<2xf32> + br ^loopHeader(%inc, %alloc1 : i32, memref<2xf32>) + +^loopHeader(%i : i32, %buff : memref<2xf32>): + %lessThan = cmpi "slt", %i, %arg1 : i32 + cond_br %lessThan, + ^loopBody(%i, %buff : i32, memref<2xf32>), + ^exit(%buff : memref<2xf32>) + +^exit(%buff3 : memref<2xf32>): + "linalg.copy"(%buff3, %arg3) : (memref<2xf32>, memref<2xf32>) -> () + return +} + +// expected-error@+1 {{Structured control-flow loops are supported only}} From 8427885e27813c457dccb011f65e8ded74444e31 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 9 Sep 2020 12:08:46 +0300 Subject: [PATCH 161/161] Temporairly revert "Thread safety analysis: Consider global variables in scope" & followup This appears to cause false-positives because it started to warn on local non-global variables. Repro posted to https://reviews.llvm.org/D84604#2262745 This reverts commit 9dcc82f34ea9b623d82d2577b93aaf67d36dabd2. This reverts commit b2ce79ef66157dd752e3864ece57915e23a73f5d. --- clang/lib/Analysis/ThreadSafety.cpp | 18 ++++-------- clang/lib/Analysis/ThreadSafetyCommon.cpp | 2 +- .../SemaCXX/warn-thread-safety-analysis.cpp | 7 ++--- .../SemaCXX/warn-thread-safety-negative.cpp | 29 ------------------- 4 files changed, 9 insertions(+), 47 deletions(-) diff --git a/clang/lib/Analysis/ThreadSafety.cpp b/clang/lib/Analysis/ThreadSafety.cpp index 5b97265a6d8ae5..64e0da9e64b122 100644 --- a/clang/lib/Analysis/ThreadSafety.cpp +++ b/clang/lib/Analysis/ThreadSafety.cpp @@ -1266,21 +1266,13 @@ ClassifyDiagnostic(const AttrTy *A) { } bool ThreadSafetyAnalyzer::inCurrentScope(const CapabilityExpr &CapE) { - const threadSafety::til::SExpr *SExp = CapE.sexpr(); - assert(SExp && "Null expressions should be ignored"); - - // Global variables are always in scope. - if (isa(SExp)) - return true; - - // Members are in scope from methods of the same class. - if (const auto *P = dyn_cast(SExp)) { - if (!CurrentMethod) + if (!CurrentMethod) return false; - const ValueDecl *VD = P->clangDecl(); - return VD->getDeclContext() == CurrentMethod->getDeclContext(); + if (const auto *P = dyn_cast_or_null(CapE.sexpr())) { + const auto *VD = P->clangDecl(); + if (VD) + return VD->getDeclContext() == CurrentMethod->getDeclContext(); } - return false; } diff --git a/clang/lib/Analysis/ThreadSafetyCommon.cpp b/clang/lib/Analysis/ThreadSafetyCommon.cpp index aee91857600717..1b8c55e56d4704 100644 --- a/clang/lib/Analysis/ThreadSafetyCommon.cpp +++ b/clang/lib/Analysis/ThreadSafetyCommon.cpp @@ -274,7 +274,7 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE, const auto *VD = cast(DRE->getDecl()->getCanonicalDecl()); // Function parameters require substitution and/or renaming. - if (const auto *PV = dyn_cast(VD)) { + if (const auto *PV = dyn_cast_or_null(VD)) { unsigned I = PV->getFunctionScopeIndex(); const DeclContext *D = PV->getDeclContext(); if (Ctx && Ctx->FunArgs) { diff --git a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp index d1520b1decbd32..91bd15def577dd 100644 --- a/clang/test/SemaCXX/warn-thread-safety-analysis.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-analysis.cpp @@ -5036,8 +5036,7 @@ void spawn_fake_flight_control_thread(void) { } extern const char *deque_log_msg(void) __attribute__((requires_capability(Logger))); -void logger_entry(void) __attribute__((requires_capability(Logger))) - __attribute__((requires_capability(!FlightControl))) { +void logger_entry(void) __attribute__((requires_capability(Logger))) { const char *msg; while ((msg = deque_log_msg())) { @@ -5045,13 +5044,13 @@ void logger_entry(void) __attribute__((requires_capability(Logger))) } } -void spawn_fake_logger_thread(void) __attribute__((requires_capability(!FlightControl))) { +void spawn_fake_logger_thread(void) { acquire(Logger); logger_entry(); release(Logger); } -int main(void) __attribute__((requires_capability(!FlightControl))) { +int main(void) { spawn_fake_flight_control_thread(); spawn_fake_logger_thread(); diff --git a/clang/test/SemaCXX/warn-thread-safety-negative.cpp b/clang/test/SemaCXX/warn-thread-safety-negative.cpp index 68e30f4a3225bd..456fe16e6574e4 100644 --- a/clang/test/SemaCXX/warn-thread-safety-negative.cpp +++ b/clang/test/SemaCXX/warn-thread-safety-negative.cpp @@ -81,35 +81,6 @@ class Foo { } // end namespace SimpleTest -Mutex globalMutex; - -namespace ScopeTest { - -void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); -void fq() EXCLUSIVE_LOCKS_REQUIRED(!::globalMutex); - -namespace ns { - Mutex globalMutex; - void f() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex); - void fq() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex); -} - -void testGlobals() EXCLUSIVE_LOCKS_REQUIRED(!ns::globalMutex) { - f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} - fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} - ns::f(); - ns::fq(); -} - -void testNamespaceGlobals() EXCLUSIVE_LOCKS_REQUIRED(!globalMutex) { - f(); - fq(); - ns::f(); // expected-warning {{calling function 'f' requires negative capability '!globalMutex'}} - ns::fq(); // expected-warning {{calling function 'fq' requires negative capability '!globalMutex'}} -} - -} // end namespace ScopeTest - namespace DoubleAttribute { struct Foo {