From 496f8e5b369f091def93482578232da8c6e77a7a Mon Sep 17 00:00:00 2001 From: Hamilton Tobon Mosquera Date: Mon, 17 Aug 2020 20:18:21 -0500 Subject: [PATCH 001/101] [OpenMPOpt][HideMemTransfersLatency] Split __tgt_target_data_begin_mapper into its "issue" and "wait" counterparts. WIP that tries to hide the latency of runtime calls that involve host to device memory transfers by splitting them into their "issue" and "wait" versions. The "issue" is moved upwards as much as possible. The "wait" is moved downards as much as possible. The "issue" issues the memory transfer asynchronously, returning a handle. The "wait" waits in the returned handle for the memory transfer to finish. We still lack of the movement. --- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 4 ++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 66 +++++++++++++++++++ .../OpenMP/hide_mem_transfer_latency.ll | 54 ++++++--------- 3 files changed, 90 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 3fc87dc34cd346..9ad7efff6ef567 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -198,6 +198,7 @@ __OMP_ARRAY_TYPE(KmpCriticalName, Int32, 8) OMP_STRUCT_TYPE(VarName, "struct." #Name, __VA_ARGS__) __OMP_STRUCT_TYPE(Ident, ident_t, Int32, Int32, Int32, Int32, Int8Ptr) +__OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, Int8Ptr) #undef __OMP_STRUCT_TYPE #undef OMP_STRUCT_TYPE @@ -482,6 +483,9 @@ __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) __OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_begin_mapper_issue, false, AsyncInfo, Int64, Int32, + VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfo) __OMP_RTL(__tgt_target_data_end_mapper, false, Void, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr) __OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, Int64, Int32, diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 93f1e5392eb2cc..ae7bafd7d91e5c 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -42,6 +42,13 @@ static cl::opt PrintICVValues("openmp-print-icv-values", cl::init(false), static cl::opt PrintOpenMPKernels("openmp-print-gpu-kernels", cl::init(false), cl::Hidden); +static cl::opt HideMemoryTransferLatency( + "openmp-hide-memory-transfer-latency", + cl::desc("[WIP] Tries to hide the latency of host to device memory" + " transfers"), + cl::Hidden, cl::init(false)); + + STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); STATISTIC(NumOpenMPParallelRegionsDeleted, @@ -508,6 +515,8 @@ struct OpenMPOpt { Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); + if (HideMemoryTransferLatency) + Changed |= hideMemTransfersLatency(); return Changed; } @@ -666,6 +675,63 @@ struct OpenMPOpt { return Changed; } + /// Tries to hide the latency of runtime calls that involve host to + /// device memory transfers by splitting them into their "issue" and "wait" + /// versions. The "issue" is moved upwards as much as possible. The "wait" is + /// moved downards as much as possible. The "issue" issues the memory transfer + /// asynchronously, returning a handle. The "wait" waits in the returned + /// handle for the memory transfer to finish. + bool hideMemTransfersLatency() { + auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; + bool Changed = false; + auto SplitMemTransfers = [&](Use &U, Function &Decl) { + auto *RTCall = getCallIfRegularCall(U, &RFI); + if (!RTCall) + return false; + + bool WasSplit = splitTargetDataBeginRTC(RTCall); + Changed |= WasSplit; + return WasSplit; + }; + RFI.foreachUse(SCC, SplitMemTransfers); + + return Changed; + } + + /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. + bool splitTargetDataBeginRTC(CallInst *RuntimeCall) { + auto &IRBuilder = OMPInfoCache.OMPBuilder; + // Add "issue" runtime call declaration: + // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, + // i8**, i8**, i64*, i64*) + FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___tgt_target_data_begin_mapper_issue); + + // Change RuntimeCall call site for its asynchronous version. + SmallVector Args; + for (auto &Arg : RuntimeCall->args()) + Args.push_back(Arg.get()); + + CallInst *IssueCallsite = + CallInst::Create(IssueDecl, Args, "handle", RuntimeCall); + RuntimeCall->eraseFromParent(); + + // Add "wait" runtime call declaration: + // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) + FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___tgt_target_data_begin_mapper_wait); + + // Add call site to WaitDecl. + Value *WaitParams[2] = { + IssueCallsite->getArgOperand(0), // device_id. + IssueCallsite // returned handle. + }; + CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", + IssueCallsite->getNextNode()); + + return true; + } + static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, bool GlobalOnly, bool &SingleChoice) { if (CurrentIdent == NextIdent) diff --git a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll index daebe4b52ace5a..7f55ad12af2d71 100644 --- a/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll +++ b/llvm/test/Transforms/OpenMP/hide_mem_transfer_latency.ll @@ -1,9 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature --scrub-attributes -; RUN: opt -S -passes=openmpopt -aa-pipeline=basic-aa < %s | FileCheck %s +; RUN: opt -S -passes=openmpopt -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -; FIXME: This struct should be generated after splitting at least one of the runtime calls. -; %struct.__tgt_async_info = type { i8* } +; CHECK: %struct.__tgt_async_info = type { i8* } %struct.ident_t = type { i32, i32, i32, i32, i8* } %struct.__tgt_offload_entry = type { i8*, i8*, i64, i32, i32 } @@ -58,7 +57,10 @@ define dso_local double @heavyComputation1() { ; CHECK-NEXT: %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 ; CHECK-NEXT: %4 = bitcast [1 x i8*]* %.offload_ptrs to double** ; CHECK-NEXT: store double* %a, double** %4, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %5 = bitcast double* %a to i64* ; CHECK-NEXT: %6 = load i64, i64* %5, align 8 ; CHECK-NEXT: %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0 @@ -102,11 +104,6 @@ entry: %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 %4 = bitcast [1 x i8*]* %.offload_ptrs to double** store double* %a, double** %4, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null) %5 = bitcast double* %a to i64* @@ -186,7 +183,10 @@ define dso_local i32 @heavyComputation2(double* %a, i32 %size) { ; CHECK-NEXT: store i32* %size.addr, i32** %9, align 8 ; CHECK-NEXT: %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 ; CHECK-NEXT: store i64 4, i64* %10, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 2, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %11 = load i32, i32* %size.addr, align 4 ; CHECK-NEXT: %size.casted = zext i32 %11 to i64 ; CHECK-NEXT: %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 @@ -241,12 +241,6 @@ entry: store i32* %size.addr, i32** %9, align 8 %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 store i64 4, i64* %10, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. Here though, the "issue" cannot be moved upwards - ; because it's not guaranteed that rand() won't modify *a. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) %11 = load i32, i32* %size.addr, align 4 @@ -330,7 +324,10 @@ define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) { ; CHECK-NEXT: store i32* %size.addr, i32** %9, align 8 ; CHECK-NEXT: %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 ; CHECK-NEXT: store i64 4, i64* %10, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 2, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %11 = load i32, i32* %size.addr, align 4 ; CHECK-NEXT: %size.casted = zext i32 %11 to i64 ; CHECK-NEXT: %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 @@ -386,11 +383,6 @@ entry: store i32* %size.addr, i32** %9, align 8 %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 store i64 4, i64* %10, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null) %11 = load i32, i32* %size.addr, align 4 @@ -459,7 +451,10 @@ define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) { ; CHECK-NEXT: store double* %a, double** %4, align 8 ; CHECK-NEXT: %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0 ; CHECK-NEXT: store i64 %0, i64* %5, align 8 -; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) + +; CHECK-NEXT: %handle = call %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64 -1, i32 1, i8** %1, i8** %3, i64* %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) +; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info %handle) + ; CHECK-NEXT: %rem = urem i32 %call, %size ; CHECK-NEXT: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) ; CHECK-NEXT: ret i32 %rem @@ -482,13 +477,6 @@ entry: store double* %a, double** %4, align 8 %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0 store i64 %0, i64* %5, align 8 - ; FIXME: This setup for the runtime call __tgt_target_data_begin_mapper should be - ; split into its "issue" and "wait" counterpars and moved upwards - ; and downwards, respectively. Here though, the "wait" cannot be moved downwards - ; because it is not worthit. That is, there is no store nor call to be hoisted - ; over. - ; %handle = call i8* @__tgt_target_data_begin_mapper_issue(...) - ; call void @__tgt_target_data_begin_wait(i64 -1, %struct.__tgt_async_info %handle) call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null) %rem = urem i32 %call, %size @@ -503,7 +491,5 @@ declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8* declare dso_local i32 @rand(...) -; FIXME: These two function declarations must be generated after splitting the runtime function -; __tgt_target_data_begin_mapper. -; declare %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**) -; declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info) +; CHECK: declare %struct.__tgt_async_info @__tgt_target_data_begin_mapper_issue(i64, i32, i8**, i8**, i64*, i64*, i8**) +; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info) From 2af4c2b2b1be0333a14fbf82d9e31f62d0f3106c Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 17 Aug 2020 17:48:04 -0700 Subject: [PATCH 002/101] [NewPM] Pin various tests under Other/ to legacy PM These all are legacy PM-specific or have a corresponding NPM RUN line. Reviewed By: ychen Differential Revision: https://reviews.llvm.org/D86124 --- llvm/test/Other/cspgo-O2-pipeline.ll | 4 ++-- llvm/test/Other/loop-pass-printer.ll | 6 +++--- llvm/test/Other/opt-O0-pipeline-enable-matrix.ll | 2 +- llvm/test/Other/opt-O0-pipeline.ll | 2 +- llvm/test/Other/opt-O2-pipeline.ll | 2 +- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll | 2 +- llvm/test/Other/opt-O3-pipeline.ll | 2 +- llvm/test/Other/opt-Os-pipeline.ll | 2 +- llvm/test/Other/opt-pipeline-vector-passes.ll | 10 +++++----- llvm/test/Other/optimize-options.ll | 10 +++++----- llvm/test/Other/pass-pipelines.ll | 6 +++--- llvm/test/Other/print-cfg-sccs.ll | 2 +- llvm/test/Other/print-module-scope.ll | 4 ++-- llvm/test/Other/printer.ll | 2 +- llvm/test/Other/time-passes.ll | 6 +++--- 15 files changed, 31 insertions(+), 31 deletions(-) diff --git a/llvm/test/Other/cspgo-O2-pipeline.ll b/llvm/test/Other/cspgo-O2-pipeline.ll index 974213c83c8128..26f2e338cbc8b7 100644 --- a/llvm/test/Other/cspgo-O2-pipeline.ll +++ b/llvm/test/Other/cspgo-O2-pipeline.ll @@ -1,13 +1,13 @@ ; Test CSGen pass in CSPGO. ; RUN: llvm-profdata merge %S/Inputs/cspgo-noncs.proftext -o %t-noncs.profdata ; RUN: llvm-profdata merge %S/Inputs/cspgo-cs.proftext -o %t-cs.profdata -; RUN: opt -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENDEFAULT +; RUN: opt -enable-new-pm=0 -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENDEFAULT ; CSGENDEFAULT: PGOInstrumentationUse ; CSGENDEFAULT: PGOInstrumentationGenCreateVar ; CSGENDEFAULT: PGOInstrumentationGen ; Test CSUse pass in CSPGO. -; RUN: opt -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEDEFAULT +; RUN: opt -enable-new-pm=0 -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEDEFAULT ; CSUSEDEFAULT: PGOInstrumentationUse ; CSUSEDEFAULT-NOT: PGOInstrumentationGenCreateVar ; CSUSEDEFAULT: PGOInstrumentationUse diff --git a/llvm/test/Other/loop-pass-printer.ll b/llvm/test/Other/loop-pass-printer.ll index aab4dc91573a5f..c74d202f262158 100644 --- a/llvm/test/Other/loop-pass-printer.ll +++ b/llvm/test/Other/loop-pass-printer.ll @@ -1,19 +1,19 @@ ; This test checks -print-after/before on loop passes ; Besides of the loop itself it should be dumping loop pre-header and exits. ; -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -loop-deletion -print-before=loop-deletion \ ; RUN: | FileCheck %s -check-prefix=DEL ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes='loop(loop-deletion)' -print-before-all \ ; RUN: | FileCheck %s -check-prefix=DEL -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -loop-unroll -print-after=loop-unroll -filter-print-funcs=bar \ ; RUN: | FileCheck %s -check-prefix=BAR -check-prefix=BAR-OLD ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes='require,loop(loop-unroll-full)' -print-after-all -filter-print-funcs=bar \ ; RUN: | FileCheck %s -check-prefix=BAR -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -loop-unroll -print-after=loop-unroll -filter-print-funcs=foo -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=FOO-MODULE -check-prefix=FOO-MODULE-OLD ; RUN: opt < %s 2>&1 -disable-output \ diff --git a/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll index 401cbb976a4164..f754f6d1a513eb 100644 --- a/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll @@ -1,4 +1,4 @@ -; RUN: opt -O0 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=0 -O0 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O0-pipeline.ll b/llvm/test/Other/opt-O0-pipeline.ll index ce431a502f93cb..6900b88cbb4ebf 100644 --- a/llvm/test/Other/opt-O0-pipeline.ll +++ b/llvm/test/Other/opt-O0-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=CHECK,%llvmcheckext +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=CHECK,%llvmcheckext ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll index 56f85d0fb9a8c1..e606e7cfac1716 100644 --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll index a0b7a8f5e1e3d7..aaee6f786bac91 100644 --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -1,4 +1,4 @@ -; RUN: opt -O3 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=0 -O3 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll index 942f7d9dfead59..b2d2f85ae21be2 100644 --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll index d975cc48b629c3..cc91707c4b009d 100644 --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=x86_64-- -Os -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s +; RUN: opt -enable-new-pm=0 -mtriple=x86_64-- -Os -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck --check-prefixes=CHECK,%llvmcheckext %s ; REQUIRES: asserts diff --git a/llvm/test/Other/opt-pipeline-vector-passes.ll b/llvm/test/Other/opt-pipeline-vector-passes.ll index c9966d43e49126..5a76bfed168542 100644 --- a/llvm/test/Other/opt-pipeline-vector-passes.ll +++ b/llvm/test/Other/opt-pipeline-vector-passes.ll @@ -1,8 +1,8 @@ -; RUN: opt -O1 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1 -; RUN: opt -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2 -; RUN: opt -O2 -extra-vectorizer-passes -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_EXTRA -; RUN: opt -O1 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1_FORCE_OFF -; RUN: opt -O2 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF +; RUN: opt -enable-new-pm=0 -O1 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1 +; RUN: opt -enable-new-pm=0 -O2 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2 +; RUN: opt -enable-new-pm=0 -O2 -extra-vectorizer-passes -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_EXTRA +; RUN: opt -enable-new-pm=0 -O1 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O1_FORCE_OFF +; RUN: opt -enable-new-pm=0 -O2 -vectorize-loops=0 -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=OLDPM_O2_FORCE_OFF ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O1 ; RUN: opt -disable-verify -debug-pass-manager -passes='default' -S %s 2>&1 | FileCheck %s --check-prefixes=NEWPM_O2 diff --git a/llvm/test/Other/optimize-options.ll b/llvm/test/Other/optimize-options.ll index 22dd842cab0696..ab2fc8f75b73bf 100644 --- a/llvm/test/Other/optimize-options.ll +++ b/llvm/test/Other/optimize-options.ll @@ -1,8 +1,8 @@ -;RUN: opt -S -O1 -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -O2 -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -Os -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -Oz -debug-pass=Arguments %s 2>&1 | FileCheck %s -;RUN: opt -S -O3 -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -O1 -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -O2 -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -Os -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -Oz -debug-pass=Arguments %s 2>&1 | FileCheck %s +;RUN: opt -enable-new-pm=0 -S -O3 -debug-pass=Arguments %s 2>&1 | FileCheck %s ; Just check that we get a non-empty set of passes for each -O option. ;CHECK: Pass Arguments: {{.*}} -print-module diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll index 620325ec1d5ee6..ccd364d5d74044 100644 --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -3,15 +3,15 @@ ; legacy pass manager doesn't introduce unexpected structural changes in the ; pass pipeline. ; -; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: opt -enable-new-pm=0 -disable-output -disable-verify -debug-pass=Structure \ ; RUN: -O2 %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O2 ; RUN: llvm-profdata merge %S/Inputs/pass-pipelines.proftext -o %t.profdata -; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: opt -enable-new-pm=0 -disable-output -disable-verify -debug-pass=Structure \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -O2 %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O2 --check-prefix=PGOUSE -; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: opt -enable-new-pm=0 -disable-output -disable-verify -debug-pass=Structure \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -hot-cold-split \ ; RUN: -O2 %s 2>&1 \ diff --git a/llvm/test/Other/print-cfg-sccs.ll b/llvm/test/Other/print-cfg-sccs.ll index 43e885476bca81..6162b2d38fed5f 100644 --- a/llvm/test/Other/print-cfg-sccs.ll +++ b/llvm/test/Other/print-cfg-sccs.ll @@ -1,4 +1,4 @@ -; RUN: opt -print-cfg-sccs -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -enable-new-pm=0 -print-cfg-sccs -disable-output < %s 2>&1 | FileCheck %s ; CHECK: SCCs for Function test in PostOrder: ; CHECK-NEXT: SCC #1 : %exit, diff --git a/llvm/test/Other/print-module-scope.ll b/llvm/test/Other/print-module-scope.ll index 54e087ff29d25f..08d6bbb3a28b0a 100644 --- a/llvm/test/Other/print-module-scope.ll +++ b/llvm/test/Other/print-module-scope.ll @@ -3,13 +3,13 @@ ; - all the function attributes are shown, including those of declarations ; - works on top of -print-after and -filter-print-funcs ; -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -simplifycfg -print-after=simplifycfg -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=CFG ; RUN: opt < %s 2>&1 -disable-output \ ; RUN: -passes=simplify-cfg -print-after-all -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=CFG -; RUN: opt < %s 2>&1 -disable-output \ +; RUN: opt -enable-new-pm=0 < %s 2>&1 -disable-output \ ; RUN: -simplifycfg -print-after=simplifycfg -filter-print-funcs=foo -print-module-scope \ ; RUN: | FileCheck %s -check-prefix=FOO ; RUN: opt < %s 2>&1 -disable-output \ diff --git a/llvm/test/Other/printer.ll b/llvm/test/Other/printer.ll index 86337656285500..f5fdbfc1d7099c 100644 --- a/llvm/test/Other/printer.ll +++ b/llvm/test/Other/printer.ll @@ -1,4 +1,4 @@ -; RUN: opt -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | \ +; RUN: opt -enable-new-pm=0 -mem2reg -instcombine -print-after-all -disable-output < %s 2>&1 | \ ; RUN: FileCheck --check-prefixes=CHECK,OLDPM %s --implicit-check-not='IR Dump' ; RUN: opt -passes='mem2reg,instcombine' -print-after-all -disable-output < %s 2>&1 | \ ; RUN: FileCheck --check-prefixes=CHECK,NEWPM %s --implicit-check-not='IR Dump' diff --git a/llvm/test/Other/time-passes.ll b/llvm/test/Other/time-passes.ll index 743b4ebc0d6d1a..e3b5a003703079 100644 --- a/llvm/test/Other/time-passes.ll +++ b/llvm/test/Other/time-passes.ll @@ -1,11 +1,11 @@ -; RUN: opt < %s -disable-output -instcombine -instcombine -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY -; RUN: opt < %s -disable-output -instcombine -instcombine -licm -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY --check-prefix=TIME-DOUBLE-LICM-LEGACY +; RUN: opt -enable-new-pm=0 < %s -disable-output -instcombine -instcombine -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY +; RUN: opt -enable-new-pm=0 < %s -disable-output -instcombine -instcombine -licm -licm -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-LEGACY --check-prefix=TIME-DOUBLE-LICM-LEGACY ; RUN: opt < %s -disable-output -passes='instcombine,instcombine,loop(licm)' -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-NEW ; RUN: opt < %s -disable-output -passes='instcombine,loop(licm),instcombine,loop(licm)' -time-passes 2>&1 | FileCheck %s --check-prefix=TIME --check-prefix=TIME-NEW -check-prefix=TIME-DOUBLE-LICM-NEW ; RUN: opt < %s -disable-output -passes='default' -time-passes 2>&1 | FileCheck %s --check-prefix=TIME ; ; The following 4 test runs verify -info-output-file interaction (default goes to stderr, '-' goes to stdout). -; RUN: opt < %s -disable-output -O2 -time-passes -info-output-file='-' 2>/dev/null | FileCheck %s --check-prefix=TIME +; RUN: opt -enable-new-pm=0 < %s -disable-output -O2 -time-passes -info-output-file='-' 2>/dev/null | FileCheck %s --check-prefix=TIME ; RUN: opt < %s -disable-output -passes='default' -time-passes -info-output-file='-' 2>/dev/null | FileCheck %s --check-prefix=TIME ; ; RUN: rm -f %t; opt < %s -disable-output -O2 -time-passes -info-output-file=%t From c7ec3a7e338cd8e58424a66d29162e9b6a5847f7 Mon Sep 17 00:00:00 2001 From: Amy Kwan Date: Wed, 12 Aug 2020 09:23:05 -0500 Subject: [PATCH 003/101] [PowerPC] Implement Vector Extract Mask builtins in LLVM/Clang This patch implements the vec_extractm function prototypes in altivec.h in order to utilize the vector extract with mask instructions introduced in Power10. Differential Revision: https://reviews.llvm.org/D82675 --- clang/include/clang/Basic/BuiltinsPPC.def | 7 ++ clang/lib/Headers/altivec.h | 28 ++++++++ clang/test/CodeGen/builtins-ppc-p10vector.c | 30 +++++++++ llvm/include/llvm/IR/IntrinsicsPowerPC.td | 12 ++++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td | 15 +++-- .../CodeGen/PowerPC/p10-vector-mask-ops.ll | 66 +++++++++++++++++++ 6 files changed, 153 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index b79ed41284ace8..73c60780041572 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -298,6 +298,13 @@ BUILTIN(__builtin_altivec_vrldmi, "V2ULLiV2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Extract with Mask built-ins. +BUILTIN(__builtin_altivec_vextractbm, "UiV16Uc", "") +BUILTIN(__builtin_altivec_vextracthm, "UiV8Us", "") +BUILTIN(__builtin_altivec_vextractwm, "UiV4Ui", "") +BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "") +BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index ac4182613cdda3..b1e70f6c41bbc6 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16815,6 +16815,34 @@ static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) { } #ifdef __POWER10_VECTOR__ + +/* vec_extractm */ + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned char __a) { + return __builtin_altivec_vextractbm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned short __a) { + return __builtin_altivec_vextracthm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned int __a) { + return __builtin_altivec_vextractwm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned long long __a) { + return __builtin_altivec_vextractdm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned __int128 __a) { + return __builtin_altivec_vextractqm(__a); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c index a575f5a924c5e6..fe3e678a579485 100644 --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -97,6 +97,36 @@ vector unsigned long long test_vpextd(void) { return vec_pext(vulla, vullb); } +unsigned int test_vec_extractm_uc(void) { + // CHECK: @llvm.ppc.altivec.vextractbm(<16 x i8> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vuca); +} + +unsigned int test_vec_extractm_us(void) { + // CHECK: @llvm.ppc.altivec.vextracthm(<8 x i16> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vusa); +} + +unsigned int test_vec_extractm_ui(void) { + // CHECK: @llvm.ppc.altivec.vextractwm(<4 x i32> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vuia); +} + +unsigned int test_vec_extractm_ull(void) { + // CHECK: @llvm.ppc.altivec.vextractdm(<2 x i64> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vulla); +} + +unsigned int test_vec_extractm_u128(void) { + // CHECK: @llvm.ppc.altivec.vextractqm(<1 x i128> %{{.+}}) + // CHECK-NEXT: ret i32 + return vec_extractm(vui128a); +} + vector unsigned long long test_vcfuged(void) { // CHECK: @llvm.ppc.altivec.vcfuged(<2 x i64> // CHECK-NEXT: ret <2 x i64> diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index ae25bb400e463f..ce4c98968a7b70 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -434,6 +434,18 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; + // P10 Vector Extract with Mask + def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextracthm : GCCBuiltin<"__builtin_altivec_vextracthm">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractwm : GCCBuiltin<"__builtin_altivec_vextractwm">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractdm : GCCBuiltin<"__builtin_altivec_vextractdm">, + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, + Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index e86e7828c075a8..5bd1632475162c 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -965,19 +965,24 @@ let Predicates = [IsISA3_1] in { RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB), "vextractbm $rD, $vB", IIC_VecGeneral, - []>; + [(set i32:$rD, + (int_ppc_altivec_vextractbm v16i8:$vB))]>; def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB), "vextracthm $rD, $vB", IIC_VecGeneral, - []>; + [(set i32:$rD, + (int_ppc_altivec_vextracthm v8i16:$vB))]>; def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB), "vextractwm $rD, $vB", IIC_VecGeneral, - []>; + [(set i32:$rD, + (int_ppc_altivec_vextractwm v4i32:$vB))]>; def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB), "vextractdm $rD, $vB", IIC_VecGeneral, - []>; + [(set i32:$rD, + (int_ppc_altivec_vextractdm v2i64:$vB))]>; def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB), "vextractqm $rD, $vB", IIC_VecGeneral, - []>; + [(set i32:$rD, + (int_ppc_altivec_vextractqm v1i128:$vB))]>; def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandbm $vD, $vB", IIC_VecGeneral, []>; diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll new file mode 100644 index 00000000000000..2b1cf27c20ec9a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector mask manipulation operations +; on Power10. + +declare i32 @llvm.ppc.altivec.vextractbm(<16 x i8>) +declare i32 @llvm.ppc.altivec.vextracthm(<8 x i16>) +declare i32 @llvm.ppc.altivec.vextractwm(<4 x i32>) +declare i32 @llvm.ppc.altivec.vextractdm(<2 x i64>) +declare i32 @llvm.ppc.altivec.vextractqm(<1 x i128>) + +define i32 @test_vextractbm(<16 x i8> %a) { +; CHECK-LABEL: test_vextractbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractbm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractbm(<16 x i8> %a) + ret i32 %ext +} + +define i32 @test_vextracthm(<8 x i16> %a) { +; CHECK-LABEL: test_vextracthm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextracthm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextracthm(<8 x i16> %a) + ret i32 %ext +} + +define i32 @test_vextractwm(<4 x i32> %a) { +; CHECK-LABEL: test_vextractwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractwm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractwm(<4 x i32> %a) + ret i32 %ext +} + +define i32 @test_vextractdm(<2 x i64> %a) { +; CHECK-LABEL: test_vextractdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractdm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractdm(<2 x i64> %a) + ret i32 %ext +} + +define i32 @test_vextractqm(<1 x i128> %a) { +; CHECK-LABEL: test_vextractqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractqm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractqm(<1 x i128> %a) + ret i32 %ext +} From a52173a3e56553d7b795bcf3cdadcf6433117107 Mon Sep 17 00:00:00 2001 From: Harmen Stoppels Date: Mon, 17 Aug 2020 19:51:11 -0700 Subject: [PATCH 004/101] Use find_library for ncurses Currently it is hard to avoid having LLVM link to the system install of ncurses, since it uses check_library_exists to find e.g. libtinfo and not find_library or find_package. With this change the ncurses lib is found with find_library, which also considers CMAKE_PREFIX_PATH. This solves an issue for the spack package manager, where we want to use the zlib installed by spack, and spack provides the CMAKE_PREFIX_PATH for it. This is a similar change as https://reviews.llvm.org/D79219, which just landed in master. Differential revision: https://reviews.llvm.org/D85820 --- compiler-rt/cmake/config-ix.cmake | 21 +++++++++---------- compiler-rt/lib/xray/tests/CMakeLists.txt | 2 +- lldb/source/Core/CMakeLists.txt | 4 ++-- llvm/cmake/config-ix.cmake | 21 ++++++++----------- llvm/include/llvm/Config/config.h.cmake | 2 +- llvm/lib/Support/CMakeLists.txt | 6 ++---- llvm/lib/Support/Unix/Process.inc | 6 +++--- .../llvm/include/llvm/Config/BUILD.gn | 4 ++-- 8 files changed, 30 insertions(+), 36 deletions(-) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 5f9e868de5fd81..c9d0da2fc08936 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -133,17 +133,16 @@ check_library_exists(pthread pthread_create "" COMPILER_RT_HAS_LIBPTHREAD) check_library_exists(execinfo backtrace "" COMPILER_RT_HAS_LIBEXECINFO) # Look for terminfo library, used in unittests that depend on LLVMSupport. -if(LLVM_ENABLE_TERMINFO) - foreach(library terminfo tinfo curses ncurses ncursesw) - string(TOUPPER ${library} library_suffix) - check_library_exists( - ${library} setupterm "" COMPILER_RT_HAS_TERMINFO_${library_suffix}) - if(COMPILER_RT_HAS_TERMINFO_${library_suffix}) - set(COMPILER_RT_HAS_TERMINFO TRUE) - set(COMPILER_RT_TERMINFO_LIB "${library}") - break() - endif() - endforeach() +if(LLVM_ENABLE_TERMINFO STREQUAL FORCE_ON) + set(MAYBE_REQUIRED REQUIRED) +else() + set(MAYBE_REQUIRED) +endif() +find_library(COMPILER_RT_TERMINFO_LIB NAMES terminfo tinfo curses ncurses ncursesw ${MAYBE_REQUIRED}) +if(COMPILER_RT_TERMINFO_LIB) + set(LLVM_ENABLE_TERMINFO 1) +else() + set(LLVM_ENABLE_TERMINFO 0) endif() if (ANDROID AND COMPILER_RT_HAS_LIBDL) diff --git a/compiler-rt/lib/xray/tests/CMakeLists.txt b/compiler-rt/lib/xray/tests/CMakeLists.txt index a1fbccaeb6d268..96a9db1ef87773 100644 --- a/compiler-rt/lib/xray/tests/CMakeLists.txt +++ b/compiler-rt/lib/xray/tests/CMakeLists.txt @@ -55,7 +55,7 @@ set(XRAY_UNITTEST_LINK_FLAGS if (NOT APPLE) # Needed by LLVMSupport. append_list_if( - COMPILER_RT_HAS_TERMINFO + LLVM_ENABLE_TERMINFO -l${COMPILER_RT_TERMINFO_LIB} XRAY_UNITTEST_LINK_FLAGS) if (COMPILER_RT_STANDALONE_BUILD) diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index a4057d11077f39..01a25045081f9f 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -11,8 +11,8 @@ set(LLDB_LIBEDIT_LIBS) if (LLDB_ENABLE_CURSES) list(APPEND LLDB_CURSES_LIBS ${CURSES_LIBRARIES} ${PANEL_LIBRARIES}) - if(LLVM_ENABLE_TERMINFO AND HAVE_TERMINFO) - list(APPEND LLDB_CURSES_LIBS ${TERMINFO_LIBS}) + if(LLVM_ENABLE_TERMINFO) + list(APPEND LLDB_CURSES_LIBS ${TERMINFO_LIB}) endif() if (LLVM_BUILD_STATIC) list(APPEND LLDB_CURSES_LIBS gpm) diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 2c20a1afeec01d..67e2eb1cec1433 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -147,19 +147,16 @@ if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") else() set(HAVE_LIBEDIT 0) endif() - if(LLVM_ENABLE_TERMINFO) - set(HAVE_TERMINFO 0) - foreach(library terminfo tinfo curses ncurses ncursesw) - string(TOUPPER ${library} library_suffix) - check_library_exists(${library} setupterm "" HAVE_TERMINFO_${library_suffix}) - if(HAVE_TERMINFO_${library_suffix}) - set(HAVE_TERMINFO 1) - set(TERMINFO_LIBS "${library}") - break() - endif() - endforeach() + if(LLVM_ENABLE_TERMINFO STREQUAL FORCE_ON) + set(MAYBE_REQUIRED REQUIRED) + else() + set(MAYBE_REQUIRED) + endif() + find_library(TERMINFO_LIB NAMES terminfo tinfo curses ncurses ncursesw ${MAYBE_REQUIRED}) + if(TERMINFO_LIB) + set(LLVM_ENABLE_TERMINFO 1) else() - set(HAVE_TERMINFO 0) + set(LLVM_ENABLE_TERMINFO 0) endif() find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index b8c7e070eb3416..a65947bf24c43c 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -209,7 +209,7 @@ #cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H} /* Define if the setupterm() function is supported this platform. */ -#cmakedefine HAVE_TERMINFO ${HAVE_TERMINFO} +#cmakedefine LLVM_ENABLE_TERMINFO ${LLVM_ENABLE_TERMINFO} /* Define if the xar_open() function is supported this platform. */ #cmakedefine HAVE_LIBXAR ${HAVE_LIBXAR} diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 7b45dc628160e0..b895f02a9df77a 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -21,10 +21,8 @@ elseif( CMAKE_HOST_UNIX ) STRING(REGEX REPLACE "^lib" "" Backtrace_LIBFILE ${Backtrace_LIBFILE}) set(system_libs ${system_libs} ${Backtrace_LIBFILE}) endif() - if(LLVM_ENABLE_TERMINFO) - if(HAVE_TERMINFO) - set(system_libs ${system_libs} ${TERMINFO_LIBS}) - endif() + if( LLVM_ENABLE_TERMINFO ) + set(system_libs ${system_libs} ${TERMINFO_LIB}) endif() if( LLVM_ENABLE_THREADS AND (HAVE_LIBATOMIC OR HAVE_CXX_LIBATOMICS64) ) set(system_libs ${system_libs} atomic) diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc index 24f16b51af7be9..7425d084da27af 100644 --- a/llvm/lib/Support/Unix/Process.inc +++ b/llvm/lib/Support/Unix/Process.inc @@ -313,7 +313,7 @@ unsigned Process::StandardErrColumns() { return getColumns(); } -#ifdef HAVE_TERMINFO +#ifdef LLVM_ENABLE_TERMINFO // We manually declare these extern functions because finding the correct // headers from various terminfo, curses, or other sources is harder than // writing their specs down. @@ -323,12 +323,12 @@ extern "C" int del_curterm(struct term *termp); extern "C" int tigetnum(char *capname); #endif -#ifdef HAVE_TERMINFO +#ifdef LLVM_ENABLE_TERMINFO static ManagedStatic TermColorMutex; #endif static bool terminalHasColors(int fd) { -#ifdef HAVE_TERMINFO +#ifdef LLVM_ENABLE_TERMINFO // First, acquire a global lock because these C routines are thread hostile. std::lock_guard G(*TermColorMutex); diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index c9e7c45fc118ba..35c6890efd6da9 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -284,9 +284,9 @@ write_cmake_config("config") { } if (llvm_enable_terminfo) { - values += [ "HAVE_TERMINFO=1" ] + values += [ "LLVM_ENABLE_TERMINFO=1" ] } else { - values += [ "HAVE_TERMINFO=" ] + values += [ "LLVM_ENABLE_TERMINFO=" ] } if (llvm_enable_dia_sdk) { From 15673d748acd8f26bdeee18c0aa18f44c775d738 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sun, 16 Aug 2020 18:22:04 -0400 Subject: [PATCH 005/101] [clangd] Index refs to main-file symbols as well Summary: This will be needed to support call hierarchy Reviewers: kadircet Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D83536 --- clang-tools-extra/clangd/ClangdServer.cpp | 3 +- clang-tools-extra/clangd/ClangdServer.h | 3 + clang-tools-extra/clangd/index/Background.cpp | 2 + clang-tools-extra/clangd/index/Background.h | 3 + clang-tools-extra/clangd/index/FileIndex.cpp | 21 ++++--- clang-tools-extra/clangd/index/FileIndex.h | 5 +- .../clangd/index/SymbolCollector.cpp | 5 +- .../clangd/index/SymbolCollector.h | 2 + clang-tools-extra/clangd/tool/ClangdMain.cpp | 8 +++ .../clangd/unittests/BackgroundIndexTests.cpp | 55 +++++++++++++++++++ .../clangd/unittests/SymbolCollectorTests.cpp | 25 +++++++-- 11 files changed, 112 insertions(+), 20 deletions(-) diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index 74ab21a5f7788f..d204e87c143b42 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -173,7 +173,8 @@ ClangdServer::ClangdServer(const GlobalCompilationDatabase &CDB, Callbacks *Callbacks) : ConfigProvider(Opts.ConfigProvider), TFS(TFS), DynamicIdx(Opts.BuildDynamicSymbolIndex - ? new FileIndex(Opts.HeavyweightDynamicSymbolIndex) + ? new FileIndex(Opts.HeavyweightDynamicSymbolIndex, + Opts.CollectMainFileRefs) : nullptr), GetClangTidyOptions(Opts.GetClangTidyOptions), SuggestMissingIncludes(Opts.SuggestMissingIncludes), diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h index 1bc7d70eebaddc..7068cd5eb42179 100644 --- a/clang-tools-extra/clangd/ClangdServer.h +++ b/clang-tools-extra/clangd/ClangdServer.h @@ -111,6 +111,9 @@ class ClangdServer { /// on background threads. The index is stored in the project root. bool BackgroundIndex = false; + /// Store refs to main-file symbols in the index. + bool CollectMainFileRefs = false; + /// If set, use this index to augment code completion results. SymbolIndex *StaticIndex = nullptr; diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index 18037d694c11ed..2bac6ec39d308b 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -95,6 +95,7 @@ BackgroundIndex::BackgroundIndex( BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts) : SwapIndex(std::make_unique()), TFS(TFS), CDB(CDB), ContextProvider(std::move(Opts.ContextProvider)), + CollectMainFileRefs(Opts.CollectMainFileRefs), Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), IndexStorageFactory(std::move(IndexStorageFactory)), Queue(std::move(Opts.OnProgress)), @@ -301,6 +302,7 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { return false; // Skip files that haven't changed, without errors. return true; }; + IndexOpts.CollectMainFileRefs = CollectMainFileRefs; IndexFileIn Index; auto Action = createStaticIndexingAction( diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h index 72fe84466959fe..472603013a53ac 100644 --- a/clang-tools-extra/clangd/index/Background.h +++ b/clang-tools-extra/clangd/index/Background.h @@ -137,6 +137,8 @@ class BackgroundIndex : public SwapIndex { // file. Called with the empty string for other tasks. // (When called, the context from BackgroundIndex construction is active). std::function ContextProvider = nullptr; + // Whether to collect references to main-file-only symbols. + bool CollectMainFileRefs = false; }; /// Creates a new background index and starts its threads. @@ -188,6 +190,7 @@ class BackgroundIndex : public SwapIndex { const ThreadsafeFS &TFS; const GlobalCompilationDatabase &CDB; std::function ContextProvider; + bool CollectMainFileRefs; llvm::Error index(tooling::CompileCommand); diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp index 5f84545d7c73d1..dafec6742c2ca0 100644 --- a/clang-tools-extra/clangd/index/FileIndex.cpp +++ b/clang-tools-extra/clangd/index/FileIndex.cpp @@ -47,12 +47,13 @@ SlabTuple indexSymbols(ASTContext &AST, std::shared_ptr PP, llvm::ArrayRef DeclsToIndex, const MainFileMacros *MacroRefsToIndex, const CanonicalIncludes &Includes, bool IsIndexMainAST, - llvm::StringRef Version) { + llvm::StringRef Version, bool CollectMainFileRefs) { SymbolCollector::Options CollectorOpts; CollectorOpts.CollectIncludePath = true; CollectorOpts.Includes = &Includes; CollectorOpts.CountReferences = false; CollectorOpts.Origin = SymbolOrigin::Dynamic; + CollectorOpts.CollectMainFileRefs = CollectMainFileRefs; index::IndexingOptions IndexOpts; // We only need declarations, because we don't count references. @@ -205,11 +206,11 @@ FileShardedIndex::getShard(llvm::StringRef Uri) const { return std::move(IF); } -SlabTuple indexMainDecls(ParsedAST &AST) { - return indexSymbols(AST.getASTContext(), AST.getPreprocessorPtr(), - AST.getLocalTopLevelDecls(), &AST.getMacros(), - AST.getCanonicalIncludes(), - /*IsIndexMainAST=*/true, AST.version()); +SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs) { + return indexSymbols( + AST.getASTContext(), AST.getPreprocessorPtr(), + AST.getLocalTopLevelDecls(), &AST.getMacros(), AST.getCanonicalIncludes(), + /*IsIndexMainAST=*/true, AST.version(), CollectMainFileRefs); } SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, @@ -220,7 +221,8 @@ SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST, AST.getTranslationUnitDecl()->decls().end()); return indexSymbols(AST, std::move(PP), DeclsToIndex, /*MainFileMacros=*/nullptr, Includes, - /*IsIndexMainAST=*/false, Version); + /*IsIndexMainAST=*/false, Version, + /*CollectMainFileRefs=*/false); } void FileSymbols::update(llvm::StringRef Key, @@ -371,8 +373,9 @@ FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle, llvm_unreachable("Unknown clangd::IndexType"); } -FileIndex::FileIndex(bool UseDex) +FileIndex::FileIndex(bool UseDex, bool CollectMainFileRefs) : MergedIndex(&MainFileIndex, &PreambleIndex), UseDex(UseDex), + CollectMainFileRefs(CollectMainFileRefs), PreambleIndex(std::make_unique()), MainFileIndex(std::make_unique()) {} @@ -415,7 +418,7 @@ void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version, } void FileIndex::updateMain(PathRef Path, ParsedAST &AST) { - auto Contents = indexMainDecls(AST); + auto Contents = indexMainDecls(AST, CollectMainFileRefs); MainFileSymbols.update( Path, std::make_unique(std::move(std::get<0>(Contents))), std::make_unique(std::move(std::get<1>(Contents))), diff --git a/clang-tools-extra/clangd/index/FileIndex.h b/clang-tools-extra/clangd/index/FileIndex.h index e6f8d1ef9e3d71..c7bc855bcb8e56 100644 --- a/clang-tools-extra/clangd/index/FileIndex.h +++ b/clang-tools-extra/clangd/index/FileIndex.h @@ -104,7 +104,7 @@ class FileSymbols { /// FIXME: Expose an interface to remove files that are closed. class FileIndex : public MergedIndex { public: - FileIndex(bool UseDex = true); + FileIndex(bool UseDex = true, bool CollectMainFileRefs = false); /// Update preamble symbols of file \p Path with all declarations in \p AST /// and macros in \p PP. @@ -118,6 +118,7 @@ class FileIndex : public MergedIndex { private: bool UseDex; // FIXME: this should be always on. + bool CollectMainFileRefs; // Contains information from each file's preamble only. Symbols and relations // are sharded per declaration file to deduplicate multiple symbols and reduce @@ -152,7 +153,7 @@ using SlabTuple = std::tuple; /// Retrieves symbols and refs of local top level decls in \p AST (i.e. /// `AST.getLocalTopLevelDecls()`). /// Exposed to assist in unit tests. -SlabTuple indexMainDecls(ParsedAST &AST); +SlabTuple indexMainDecls(ParsedAST &AST, bool CollectMainFileRefs = false); /// Index declarations from \p AST and macros from \p PP that are declared in /// included headers. diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index a3ceaa388cf9db..2e1f261ab18aee 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -334,12 +334,13 @@ bool SymbolCollector::handleDeclOccurrence( if (IsOnlyRef && !CollectRef) return true; - // Do not store references to main-file symbols. // Unlike other fields, e.g. Symbols (which use spelling locations), we use // file locations for references (as it aligns the behavior of clangd's // AST-based xref). // FIXME: we should try to use the file locations for other fields. - if (CollectRef && (!IsMainFileOnly || ND->isExternallyVisible()) && + if (CollectRef && + (!IsMainFileOnly || Opts.CollectMainFileRefs || + ND->isExternallyVisible()) && !isa(ND) && (Opts.RefsInHeaders || SM.getFileID(SM.getFileLoc(Loc)) == SM.getMainFileID())) diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h index f66a71c2d59b10..9b30aeba95383c 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -78,6 +78,8 @@ class SymbolCollector : public index::IndexDataConsumer { /// Collect symbols local to main-files, such as static functions /// and symbols inside an anonymous namespace. bool CollectMainFileSymbols = true; + /// Collect references to main-file symbols. + bool CollectMainFileRefs = false; /// If set to true, SymbolCollector will collect doc for all symbols. /// Note that documents of symbols being indexed for completion will always /// be collected regardless of this option. diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 3d83f3652f3003..57dac600014d5e 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -450,6 +450,13 @@ opt EnableConfig{ init(true), }; +opt CollectMainFileRefs{ + "collect-main-file-refs", + cat(Misc), + desc("Store references to main-file-only symbols in the index"), + init(false), +}; + #if CLANGD_ENABLE_REMOTE opt RemoteIndexAddress{ "remote-index-address", @@ -682,6 +689,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var if (!ResourceDir.empty()) Opts.ResourceDir = ResourceDir; Opts.BuildDynamicSymbolIndex = EnableIndex; + Opts.CollectMainFileRefs = CollectMainFileRefs; std::unique_ptr StaticIdx; std::future AsyncIndexLoad; // Block exit while loading the index. if (EnableIndex && !IndexFile.empty()) { diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp index 06614872363f54..f9f584e8895f52 100644 --- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp +++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp @@ -229,6 +229,61 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) { FileURI("unittest:///root/B.cc")})); } +TEST_F(BackgroundIndexTest, MainFileRefs) { + MockFS FS; + FS.Files[testPath("root/A.h")] = R"cpp( + void header_sym(); + )cpp"; + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nstatic void main_sym() { (void)header_sym; }"; + + // Check the behaviour with CollectMainFileRefs = false (the default). + { + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex Idx(FS, CDB, [&](llvm::StringRef) { return &MSS; }, + /*Opts=*/{}); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + CDB.setCompileCommand(testPath("root/A.cc"), Cmd); + + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + EXPECT_THAT( + runFuzzyFind(Idx, ""), + UnorderedElementsAre(AllOf(Named("header_sym"), NumReferences(1U)), + AllOf(Named("main_sym"), NumReferences(0U)))); + } + + // Check the behaviour with CollectMainFileRefs = true. + { + llvm::StringMap Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + OverlayCDB CDB(/*Base=*/nullptr); + BackgroundIndex::Options Opts; + Opts.CollectMainFileRefs = true; + BackgroundIndex Idx( + FS, CDB, [&](llvm::StringRef) { return &MSS; }, Opts); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + CDB.setCompileCommand(testPath("root/A.cc"), Cmd); + + ASSERT_TRUE(Idx.blockUntilIdleForTest()); + EXPECT_THAT( + runFuzzyFind(Idx, ""), + UnorderedElementsAre(AllOf(Named("header_sym"), NumReferences(1U)), + AllOf(Named("main_sym"), NumReferences(1U)))); + } +} + TEST_F(BackgroundIndexTest, ShardStorageTest) { MockFS FS; FS.Files[testPath("root/A.h")] = R"cpp( diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 70a8e6832d02f0..d89db8f015cea0 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -714,7 +714,6 @@ TEST_F(SymbolCollectorTest, Refs) { EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(Symbols, "NS").ID, _)))); EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "MACRO").ID, HaveRanges(Main.ranges("macro"))))); - // Symbols *only* in the main file: // - (a, b) externally visible and should have refs. // - (c, FUNC) externally invisible and had no refs collected. auto MainSymbols = @@ -723,6 +722,20 @@ TEST_F(SymbolCollectorTest, Refs) { EXPECT_THAT(Refs, Contains(Pair(findSymbol(MainSymbols, "b").ID, _))); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "c").ID, _)))); EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(MainSymbols, "FUNC").ID, _)))); + + // Run the collector again with CollectMainFileRefs = true. + // We need to recreate InMemoryFileSystem because runSymbolCollector() + // calls MemoryBuffer::getMemBuffer(), which makes the buffers unusable + // after runSymbolCollector() exits. + InMemoryFileSystem = new llvm::vfs::InMemoryFileSystem(); + CollectorOpts.CollectMainFileRefs = true; + runSymbolCollector(Header.code(), + (Main.code() + SymbolsOnlyInMainCode.code()).str()); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "a").ID, _))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "b").ID, _))); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "c").ID, _))); + // However, references to main-file macros are not collected. + EXPECT_THAT(Refs, Not(Contains(Pair(findSymbol(Symbols, "FUNC").ID, _)))); } TEST_F(SymbolCollectorTest, MacroRefInHeader) { @@ -908,8 +921,9 @@ TEST_F(SymbolCollectorTest, HeaderAsMainFile) { $Foo[[Foo]] fo; } )"); - // The main file is normal .cpp file, we should collect the refs - // for externally visible symbols. + // We should collect refs to main-file symbols in all cases: + + // 1. The main file is normal .cpp file. TestFileName = testPath("foo.cpp"); runSymbolCollector("", Header.code()); EXPECT_THAT(Refs, @@ -918,7 +932,7 @@ TEST_F(SymbolCollectorTest, HeaderAsMainFile) { Pair(findSymbol(Symbols, "Func").ID, HaveRanges(Header.ranges("Func"))))); - // Run the .h file as main file, we should collect the refs. + // 2. Run the .h file as main file. TestFileName = testPath("foo.h"); runSymbolCollector("", Header.code(), /*ExtraArgs=*/{"-xobjective-c++-header"}); @@ -929,8 +943,7 @@ TEST_F(SymbolCollectorTest, HeaderAsMainFile) { Pair(findSymbol(Symbols, "Func").ID, HaveRanges(Header.ranges("Func"))))); - // Run the .hh file as main file (without "-x c++-header"), we should collect - // the refs as well. + // 3. Run the .hh file as main file (without "-x c++-header"). TestFileName = testPath("foo.hh"); runSymbolCollector("", Header.code()); EXPECT_THAT(Symbols, UnorderedElementsAre(QName("Foo"), QName("Func"))); From 00d7b7d014f90aaaacaef6f9c778614b09356bf0 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sun, 26 Jul 2020 22:45:24 -0400 Subject: [PATCH 006/101] [clang] Fix visitation of ConceptSpecializationExpr in constrained-parameter Summary: RecursiveASTVisitor needs to traverse TypeConstraint::ImmediatelyDeclaredConstraint Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D84136 --- .../clangd/unittests/FindTargetTests.cpp | 22 +++++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 13 +++++- clang/unittests/Tooling/CMakeLists.txt | 1 + .../RecursiveASTVisitorTests/Concept.cpp | 45 +++++++++++++++++++ 4 files changed, 79 insertions(+), 2 deletions(-) create mode 100644 clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index 4c655c3338d203..2507932c5cda30 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -442,6 +442,28 @@ TEST_F(TargetDeclTest, Concept) { )cpp"; EXPECT_DECLS("ConceptSpecializationExpr", {"template concept Fooable = true;"}); + + // constrained-parameter + Code = R"cpp( + template + concept Fooable = true; + + template <[[Fooable]] T> + void bar(T t); + )cpp"; + EXPECT_DECLS("ConceptSpecializationExpr", + {"template concept Fooable = true;"}); + + // partial-concept-id + Code = R"cpp( + template + concept Fooable = true; + + template <[[Fooable]] T> + void bar(T t); + )cpp"; + EXPECT_DECLS("ConceptSpecializationExpr", + {"template concept Fooable = true;"}); } TEST_F(TargetDeclTest, FunctionTemplate) { diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 3dcfc9fee629ac..6f07b92f253230 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -1777,8 +1777,17 @@ DEF_TRAVERSE_DECL(TemplateTypeParmDecl, { // D is the "T" in something like "template class vector;" if (D->getTypeForDecl()) TRY_TO(TraverseType(QualType(D->getTypeForDecl(), 0))); - if (const auto *TC = D->getTypeConstraint()) - TRY_TO(TraverseConceptReference(*TC)); + if (const auto *TC = D->getTypeConstraint()) { + if (Expr *IDC = TC->getImmediatelyDeclaredConstraint()) { + TRY_TO(TraverseStmt(IDC)); + } else { + // Avoid traversing the ConceptReference in the TypeCosntraint + // if we have an immediately-declared-constraint, otherwise + // we'll end up visiting the concept and the arguments in + // the TC twice. + TRY_TO(TraverseConceptReference(*TC)); + } + } if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) TRY_TO(TraverseTypeLoc(D->getDefaultArgumentInfo()->getTypeLoc())); }) diff --git a/clang/unittests/Tooling/CMakeLists.txt b/clang/unittests/Tooling/CMakeLists.txt index f290c3d2bedee8..9de330ab73d425 100644 --- a/clang/unittests/Tooling/CMakeLists.txt +++ b/clang/unittests/Tooling/CMakeLists.txt @@ -22,6 +22,7 @@ add_clang_unittest(ToolingTests RecursiveASTVisitorTests/Attr.cpp RecursiveASTVisitorTests/Callbacks.cpp RecursiveASTVisitorTests/Class.cpp + RecursiveASTVisitorTests/Concept.cpp RecursiveASTVisitorTests/ConstructExpr.cpp RecursiveASTVisitorTests/CXXBoolLiteralExpr.cpp RecursiveASTVisitorTests/CXXMemberCall.cpp diff --git a/clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp b/clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp new file mode 100644 index 00000000000000..f0f700204dd5a9 --- /dev/null +++ b/clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp @@ -0,0 +1,45 @@ +//===- unittest/Tooling/RecursiveASTVisitorTests/Concept.cpp----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestVisitor.h" +#include "clang/AST/ExprConcepts.h" + +using namespace clang; + +namespace { + +struct ConceptVisitor : ExpectedLocationVisitor { + bool VisitConceptSpecializationExpr(ConceptSpecializationExpr *E) { + ++ConceptSpecializationExprsVisited; + return true; + } + bool TraverseConceptReference(const ConceptReference &R) { + ++ConceptReferencesTraversed; + return true; + } + + int ConceptSpecializationExprsVisited = 0; + int ConceptReferencesTraversed = 0; +}; + +TEST(RecursiveASTVisitor, ConstrainedParameter) { + ConceptVisitor Visitor; + EXPECT_TRUE(Visitor.runOver("template concept Fooable = true;\n" + "template void bar(T);", + ConceptVisitor::Lang_CXX2a)); + // Check that we visit the "Fooable T" template parameter's TypeConstraint's + // ImmediatelyDeclaredConstraint, which is a ConceptSpecializationExpr. + EXPECT_EQ(1, Visitor.ConceptSpecializationExprsVisited); + // There are two ConceptReference objects in the AST: the base subobject + // of the ConceptSpecializationExpr, and the base subobject of the + // TypeConstraint itself. To avoid traversing the concept and arguments + // multiple times, we only traverse one. + EXPECT_EQ(1, Visitor.ConceptReferencesTraversed); +} + +} // end anonymous namespace From b27bdf955a74e1050645ef5482498a834e9dfc1e Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 17 Aug 2020 19:54:42 -0500 Subject: [PATCH 007/101] [Attributor][FIX] Handle function pointers properly in AANonNull Before we tired to create a dominator tree for a declaration when we wanted to determine if the function pointer is `nonnull`. We now avoid looking at global values if `Value::getPointerDereferenceableBytes` not already determined `nonnull`. --- .../Transforms/IPO/AttributorAttributes.cpp | 39 +++++++--- .../IPConstantProp/openmp_parallel_for.ll | 4 +- llvm/test/Transforms/Attributor/callbacks.ll | 20 ++--- llvm/test/Transforms/Attributor/liveness.ll | 34 +++++--- llvm/test/Transforms/Attributor/misc.ll | 77 ++++++------------- llvm/test/Transforms/Attributor/nonnull.ll | 31 ++++++++ 6 files changed, 119 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 762d4a11151551..13f56ec9d50d87 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1687,21 +1687,33 @@ struct AANonNullImpl : AANonNull { Value &V = getAssociatedValue(); if (!NullIsDefined && hasAttr({Attribute::NonNull, Attribute::Dereferenceable}, - /* IgnoreSubsumingPositions */ false, &A)) + /* IgnoreSubsumingPositions */ false, &A)) { indicateOptimisticFixpoint(); - else if (isa(V)) + return; + } + + if (isa(V)) { indicatePessimisticFixpoint(); - else - AANonNull::initialize(A); + return; + } + + AANonNull::initialize(A); bool CanBeNull = true; - if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull)) - if (!CanBeNull) + if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull)) { + if (!CanBeNull) { indicateOptimisticFixpoint(); + return; + } + } - if (!getState().isAtFixpoint()) - if (Instruction *CtxI = getCtxI()) - followUsesInMBEC(*this, A, getState(), *CtxI); + if (isa(&getAssociatedValue())) { + indicatePessimisticFixpoint(); + return; + } + + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); } /// See followUsesInMBEC @@ -1778,9 +1790,14 @@ struct AANonNullFloating : public AANonNullImpl { /// NonNull attribute for function return value. struct AANonNullReturned final - : AAReturnedFromReturnedValues { + : AAReturnedFromReturnedValues { AANonNullReturned(const IRPosition &IRP, Attributor &A) - : AAReturnedFromReturnedValues(IRP, A) {} + : AAReturnedFromReturnedValues(IRP, A) {} + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "nonnull" : "may-null"; + } /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) } diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll index 83990224bddae0..137193b972ca6c 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -36,7 +36,7 @@ define dso_local void @foo(i32 %N) { ; IS__TUNIT_OPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_OPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_OPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@foo @@ -47,7 +47,7 @@ define dso_local void @foo(i32 %N) { ; IS__TUNIT_NPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_NPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_NPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@foo diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index 951f9830ae59e5..f1dfacea8a96ef 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -25,7 +25,7 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -37,7 +37,7 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t0_caller @@ -124,7 +124,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -136,7 +136,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller @@ -224,7 +224,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -236,7 +236,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller @@ -324,8 +324,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -337,8 +337,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index 2a24a168263c6e..57017c50af521a 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1785,17 +1785,29 @@ define internal void @call_via_pointer_with_dead_args_internal_b(i32* %a, i32* % ret void } define void @call_via_pointer_with_dead_args_caller(i32* %a, i32* %b) { -; CHECK-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller -; CHECK-SAME: (i32* [[A:%.*]], i32* [[B:%.*]]) -; CHECK-NEXT: [[PTR1:%.*]] = alloca i32, align 128 -; CHECK-NEXT: [[PTR2:%.*]] = alloca i32, align 128 -; CHECK-NEXT: [[PTR3:%.*]] = alloca i32, align 128 -; CHECK-NEXT: [[PTR4:%.*]] = alloca i32, align 128 -; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_1) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) -; CHECK-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_2) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller +; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* [[B:%.*]]) +; NOT_CGSCC_NPM-NEXT: [[PTR1:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: [[PTR2:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: [[PTR3:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: [[PTR4:%.*]] = alloca i32, align 128 +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer_internal_1) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer_internal_2) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* [[B:%.*]]) +; IS__CGSCC____-NEXT: [[PTR1:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: [[PTR2:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: [[PTR3:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: [[PTR4:%.*]] = alloca i32, align 128 +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_1) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_2) +; IS__CGSCC____-NEXT: ret void ; %ptr1 = alloca i32, align 128 %ptr2 = alloca i32, align 128 diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index 46a0449e5be6e8..80a6948ca6dc4c 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -9,31 +9,18 @@ define internal void @internal(void (i8*)* %fp) { ; ; -; IS__TUNIT____-LABEL: define {{[^@]+}}@internal -; IS__TUNIT____-SAME: (void (i8*)* nonnull [[FP:%.*]]) -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__TUNIT____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__TUNIT____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* nonnull bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* nonnull [[FP]]) -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__TUNIT____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__TUNIT____-NEXT: ret void -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@internal -; IS__CGSCC____-SAME: (void (i8*)* nonnull [[FP:%.*]]) -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__CGSCC____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__CGSCC____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* nonnull [[FP]]) -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__CGSCC____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__CGSCC____-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@internal +; CHECK-SAME: (void (i8*)* nonnull [[FP:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) +; CHECK-NEXT: call void @callback1(void (i32*)* nonnull @foo) +; CHECK-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) +; CHECK-NEXT: call void @callback2(void (i8*)* nonnull [[FP]]) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void [[FP]](i8* [[TMP1]]) +; CHECK-NEXT: ret void ; entry: %a = alloca i32, align 4 @@ -51,33 +38,19 @@ entry: define void @external(void (i8*)* %fp) { ; ; -; IS__TUNIT____-LABEL: define {{[^@]+}}@external -; IS__TUNIT____-SAME: (void (i8*)* [[FP:%.*]]) -; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__TUNIT____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* nonnull bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__TUNIT____-NEXT: call void @callback2(void (i8*)* [[FP]]) -; IS__TUNIT____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__TUNIT____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__TUNIT____-NEXT: call void @internal(void (i8*)* nonnull [[FP]]) -; IS__TUNIT____-NEXT: ret void -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@external -; IS__CGSCC____-SAME: (void (i8*)* [[FP:%.*]]) -; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; IS__CGSCC____-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) -; IS__CGSCC____-NEXT: call void @callback2(void (i8*)* [[FP]]) -; IS__CGSCC____-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* -; IS__CGSCC____-NEXT: call void [[FP]](i8* [[TMP1]]) -; IS__CGSCC____-NEXT: call void @internal(void (i8*)* nonnull [[FP]]) -; IS__CGSCC____-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@external +; CHECK-SAME: (void (i8*)* [[FP:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @callback1(void (i32*)* nonnull @foo) +; CHECK-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) +; CHECK-NEXT: call void @callback2(void (i8*)* [[FP]]) +; CHECK-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void [[FP]](i8* [[TMP1]]) +; CHECK-NEXT: call void @internal(void (i8*)* nonnull [[FP]]) +; CHECK-NEXT: ret void ; entry: %a = alloca i32, align 4 diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 9290d32453bada..4add5a5c1f5a89 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -1364,5 +1364,36 @@ define void @nonnull_assume_neg(i8* %arg) { declare void @use_i8_ptr(i8* nofree nocapture readnone) nounwind declare void @use_i8_ptr_ret(i8* nofree nocapture readnone) nounwind willreturn +define i8* @nonnull_function_ptr_1() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@nonnull_function_ptr_1() +; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* ()* @nonnull_function_ptr_1 to i8* +; IS__TUNIT____-NEXT: ret i8* [[BC]] +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@nonnull_function_ptr_1() +; IS__CGSCC____-NEXT: [[BC:%.*]] = bitcast i8* ()* @nonnull_function_ptr_1 to i8* +; IS__CGSCC____-NEXT: ret i8* [[BC]] +; + %bc = bitcast i8*()* @nonnull_function_ptr_1 to i8* + ret i8* %bc +} + +declare i8* @function_decl() +define i8* @nonnull_function_ptr_2() { +; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@nonnull_function_ptr_2() +; IS__TUNIT____-NEXT: [[BC:%.*]] = bitcast i8* ()* @function_decl to i8* +; IS__TUNIT____-NEXT: ret i8* [[BC]] +; +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@nonnull_function_ptr_2() +; IS__CGSCC____-NEXT: [[BC:%.*]] = bitcast i8* ()* @function_decl to i8* +; IS__CGSCC____-NEXT: ret i8* [[BC]] +; + %bc = bitcast i8*()* @function_decl to i8* + ret i8* %bc +} + attributes #0 = { null_pointer_is_valid } attributes #1 = { nounwind willreturn} From 858c75f7d19c14002eb81bcbc747bf708f92b1a9 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sat, 1 Aug 2020 01:49:28 -0500 Subject: [PATCH 008/101] [Attributor][NFC] Directly return proper type to avoid casts --- llvm/include/llvm/Transforms/IPO/Attributor.h | 4 +- .../Transforms/IPO/AttributorAttributes.cpp | 58 ++++++------------- 2 files changed, 20 insertions(+), 42 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 27832d0fcc8467..b6c0a17fc3e7fa 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -2013,7 +2013,7 @@ struct StateWrapper : public BaseType, public StateTy { StateType &getState() override { return *this; } /// See AbstractAttribute::getState(...). - const AbstractState &getState() const override { return *this; } + const StateType &getState() const override { return *this; } }; /// Helper class that provides common functionality to manifest IR attributes. @@ -3302,7 +3302,7 @@ struct AAValueConstantRange /// See AbstractAttribute::getState(...). IntegerRangeState &getState() override { return *this; } - const AbstractState &getState() const override { return *this; } + const IntegerRangeState &getState() const override { return *this; } /// Create an abstract attribute view for the position \p IRP. static AAValueConstantRange &createForPosition(const IRPosition &IRP, diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 13f56ec9d50d87..81fc52cb3f1cb0 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -449,7 +449,7 @@ static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA, const AAType &AA = A.getAAFor(QueryingAA, RVPos); LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr() << " @ " << RVPos << "\n"); - const StateType &AAS = static_cast(AA.getState()); + const StateType &AAS = AA.getState(); if (T.hasValue()) *T &= AAS; else @@ -511,7 +511,7 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA, const AAType &AA = A.getAAFor(QueryingAA, ACSArgPos); LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction() << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n"); - const StateType &AAS = static_cast(AA.getState()); + const StateType &AAS = AA.getState(); if (T.hasValue()) *T &= AAS; else @@ -568,8 +568,7 @@ struct AACallSiteReturnedFromReturned : public BaseType { IRPosition FnPos = IRPosition::returned(*AssociatedFunction); const AAType &AA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - S, static_cast(AA.getState())); + return clampStateAndIndicateChange(S, AA.getState()); } }; @@ -749,9 +748,7 @@ struct AANoUnwindCallSite final : AANoUnwindImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -1401,8 +1398,7 @@ struct AANoSyncCallSite final : AANoSyncImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -1467,8 +1463,7 @@ struct AANoFreeCallSite final : AANoFreeImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -1550,8 +1545,7 @@ struct AANoFreeCallSiteArgument final : AANoFreeFloating { return indicatePessimisticFixpoint(); const IRPosition &ArgPos = IRPosition::argument(*Arg); auto &ArgAA = A.getAAFor(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), static_cast(ArgAA.getState())); + return clampStateAndIndicateChange(getState(), ArgAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -1769,8 +1763,7 @@ struct AANonNullFloating : public AANonNullImpl { T.indicatePessimisticFixpoint(); } else { // Use abstract attribute information. - const AANonNull::StateType &NS = - static_cast(AA.getState()); + const AANonNull::StateType &NS = AA.getState(); T ^= NS; } return T.isValidState(); @@ -1924,9 +1917,7 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -2339,9 +2330,7 @@ struct AAWillReturnCallSite final : AAWillReturnImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -2735,8 +2724,7 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::returned(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -2926,8 +2914,7 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl { return indicatePessimisticFixpoint(); const IRPosition &ArgPos = IRPosition::argument(*Arg); auto &ArgAA = A.getAAFor(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), static_cast(ArgAA.getState())); + return clampStateAndIndicateChange(getState(), ArgAA.getState()); } /// See AbstractAttribute::manifest(...). @@ -3547,7 +3534,7 @@ struct AADereferenceableFloating : AADereferenceableImpl { DerefBytes = Base->getPointerDereferenceableBytes(DL, CanBeNull); T.GlobalState.indicatePessimisticFixpoint(); } else { - const DerefState &DS = static_cast(AA.getState()); + const DerefState &DS = AA.getState(); DerefBytes = DS.DerefBytesState.getAssumed(); T.GlobalState &= DS.GlobalState; } @@ -3829,8 +3816,7 @@ struct AAAlignFloating : AAAlignImpl { T.indicatePessimisticFixpoint(); } else { // Use abstract attribute information. - const AAAlign::StateType &DS = - static_cast(AA.getState()); + const AAAlign::StateType &DS = AA.getState(); T ^= DS; } return T.isValidState(); @@ -3985,9 +3971,7 @@ struct AANoReturnCallSite final : AANoReturnImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -4374,9 +4358,7 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl { return indicatePessimisticFixpoint(); const IRPosition &ArgPos = IRPosition::argument(*Arg); auto &ArgAA = A.getAAFor(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), - static_cast(ArgAA.getState())); + return clampStateAndIndicateChange(getState(), ArgAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -5880,9 +5862,7 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument { Argument *Arg = getAssociatedArgument(); const IRPosition &ArgPos = IRPosition::argument(*Arg); auto &ArgAA = A.getAAFor(*this, ArgPos); - return clampStateAndIndicateChange( - getState(), - static_cast(ArgAA.getState())); + return clampStateAndIndicateChange(getState(), ArgAA.getState()); } /// See AbstractAttribute::trackStatistics() @@ -5965,9 +5945,7 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl { Function *F = getAssociatedFunction(); const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos); - return clampStateAndIndicateChange( - getState(), - static_cast(FnAA.getState())); + return clampStateAndIndicateChange(getState(), FnAA.getState()); } /// See AbstractAttribute::trackStatistics() From 1d99c3d7073888517e3557c1b522ed273bbe188c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 2 Aug 2020 00:31:30 -0500 Subject: [PATCH 009/101] [Attributor] We (should) keep the CG updated so we can mark it as preserved --- llvm/lib/Transforms/IPO/Attributor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index fb217cb5f14ee9..54ec7174f9fb8a 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -2324,7 +2324,9 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C, InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions); if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater)) { // FIXME: Think about passes we will preserve and add them here. - return PreservedAnalyses::none(); + PreservedAnalyses PA; + PA.preserve(); + return PA; } return PreservedAnalyses::all(); } From 8abd69aa9e8c21b9958f531b03ad9801ec850154 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 2 Aug 2020 00:44:08 -0500 Subject: [PATCH 010/101] [Attributor] Bail early if AAMemoryLocation cannot derive anything Before this change we looked through all memory operations in a function even if the first was an unknown call that could do anything. This did cost a lot of time but there is little use to do so. We also avoid creating AAs for things that we would have looked at in case no other AA will; that is the reason for the test changes. Running only the attributor-cgscc pass on a IR version of `llvm-test-suite/MultiSource/Applications/SPASS/clause.c` reduced the time we spend in `AAMemoryLocation::update` from 4% total to 0.9% (disclaimer: no accurate measurements). --- llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 4 +++- llvm/test/Transforms/Attributor/dereferenceable-1.ll | 1 + llvm/test/Transforms/Attributor/heap_to_stack.ll | 9 +++++---- llvm/test/Transforms/Attributor/liveness.ll | 2 +- llvm/test/Transforms/Attributor/noreturn_async.ll | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 81fc52cb3f1cb0..a1bcec889d1bda 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -6628,7 +6628,9 @@ struct AAMemoryLocationFunction final : public AAMemoryLocationImpl { LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Accessed locations for " << I << ": " << getMemoryLocationsAsStr(MLK) << "\n"); removeAssumedBits(inverseLocation(MLK, false, false)); - return true; + // Stop once only the valid bit set in the *not assumed location*, thus + // once we don't actually exclude any memory locations in the state. + return getAssumedNotAccessedLocation() != VALID_STATE; }; if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 9a995396e516ef..3f8fb81a2636b6 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -280,6 +280,7 @@ define void @f7_2(i1 %c) { ; CHECK-SAME: (i1 [[C:%.*]]) ; CHECK-NEXT: [[PTR:%.*]] = tail call nonnull align 4 dereferenceable(4) i32* @unkown_ptr() ; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) [[PTR]]) +; CHECK-NEXT: [[ARG_A_0:%.*]] = load i32, i32* [[PTR]], align 4 ; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) [[PTR]]) ; CHECK-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] ; CHECK: if.true: diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 54e293e73179f2..28c0166dd0cd62 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -450,10 +450,11 @@ define i32 @irreducible_cfg(i32 %0) { ; IS________OPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 ; IS________OPM-NEXT: br label [[TMP8]] ; IS________OPM: 15: -; IS________OPM-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP3]] to i8* -; IS________OPM-NEXT: call void @free(i8* nocapture [[TMP16]]) -; IS________OPM-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: ret i32 [[TMP17]] +; IS________OPM-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS________OPM-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8* +; IS________OPM-NEXT: call void @free(i8* nocapture [[TMP17]]) +; IS________OPM-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS________OPM-NEXT: ret i32 [[TMP18]] ; ; IS________NPM-LABEL: define {{[^@]+}}@irreducible_cfg ; IS________NPM-SAME: (i32 [[TMP0:%.*]]) diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index 57017c50af521a..f3bd7ef1460a85 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1920,7 +1920,7 @@ define i32 @main() { ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[G_0]], 1 ; CHECK-NEXT: br label [[FOR_COND_0]] ; CHECK: for.end.0: -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @malloc(i64 8) +; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i64 8) ; CHECK-NEXT: store i8* [[CALL]], i8** bitcast (%struct.a** @e to i8**), align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[CALL]] to %struct.a** ; CHECK-NEXT: store %struct.a* null, %struct.a** [[B]], align 8 diff --git a/llvm/test/Transforms/Attributor/noreturn_async.ll b/llvm/test/Transforms/Attributor/noreturn_async.ll index 4c0fc203eb095e..879fb16a13d0bf 100644 --- a/llvm/test/Transforms/Attributor/noreturn_async.ll +++ b/llvm/test/Transforms/Attributor/noreturn_async.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes -; RUN: opt -attributor -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s +; RUN: opt -attributor -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=1 -S < %s | FileCheck %s ; ; This file is the same as noreturn_sync.ll but with a personality which ; indicates that the exception handler *can* catch asynchronous exceptions. As From 24c3dabef4436ec6436fb80e0672577ec52159ba Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 17 Aug 2020 18:17:38 -0700 Subject: [PATCH 011/101] DebugInfo: Emit class template parameters first, before members This reads more like what you'd expect the DWARF to look like (from the lexical order of C++ - template parameters come before members, etc), and also happens to make it easier to tickle (& thus test) a bug related to type units and Split DWARF I'm about to fix. --- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 11 +++++------ .../test/DebugInfo/Generic/template-recursive-void.ll | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 3dfd57c82f89a5..11729842ff710a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -891,6 +891,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { } } + // Add template parameters to a class, structure or union types. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + addTemplateParams(Buffer, CTy->getTemplateParams()); + // Add elements to structure type. DINodeArray Elements = CTy->getElements(); for (const auto *Element : Elements) { @@ -960,12 +965,6 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { if (CTy->isObjcClassComplete()) addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); - // Add template parameters to a class, structure or union types. - // FIXME: The support isn't in the metadata for this yet. - if (Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - addTemplateParams(Buffer, CTy->getTemplateParams()); - // Add the type's non-standard calling convention. uint8_t CC = 0; if (CTy->isTypePassByValue()) diff --git a/llvm/test/DebugInfo/Generic/template-recursive-void.ll b/llvm/test/DebugInfo/Generic/template-recursive-void.ll index 0b70f218b3567e..4718b7a1591125 100644 --- a/llvm/test/DebugInfo/Generic/template-recursive-void.ll +++ b/llvm/test/DebugInfo/Generic/template-recursive-void.ll @@ -14,7 +14,7 @@ ; CHECK: DW_TAG_template_type_parameter [{{.*}}] ; CHECK-NEXT: DW_AT_name{{.*}}"T" ; CHECK-NOT: DW_AT_type -; CHECK: NULL +; CHECK: {{DW_TAG|NULL}} source_filename = "test/DebugInfo/Generic/template-recursive-void.ll" From be3ef93bf58aa5546c7baadfb21d43b75fbb4e24 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 17 Aug 2020 21:27:19 -0700 Subject: [PATCH 012/101] PR44685: DebugInfo: Handle address-use-invalid type units referencing non-type units Theory was that we should never reach a non-type unit (eg: type in an anonymous namespace) when we're already in the invalid "encountered an address-use, so stop emitting types for now, until we throw out the whole type tree to restart emitting in non-type unit" state. But that's not the case (prior commit cleaned up one reason this wasn't exposed sooner - but also makes it easier to test/demonstrate this issue) --- llvm/lib/CodeGen/AsmPrinter/AddressPool.h | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 6 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 1 + llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll | 89 ++++++++++++++++++++ 4 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index f92cf72093ca03..f1edc6c330d51e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -48,7 +48,7 @@ class AddressPool { bool hasBeenUsed() const { return HasBeenUsed; } - void resetUsedFlag() { HasBeenUsed = false; } + void resetUsedFlag(bool HasBeenUsed = false) { this->HasBeenUsed = HasBeenUsed; } MCSymbol *getLabel() { return AddressTableBaseSym; } void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f70eed32f0b532..cee72120accb79 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3305,14 +3305,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) { + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { DD->TypeUnitsUnderConstruction.clear(); - assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed()); + DD->AddrPool.resetUsedFlag(); } DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); - DD->AddrPool.resetUsedFlag(); + DD->AddrPool.resetUsedFlag(AddrPoolUsed); } DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0b943ebe46b669..93e08d1151ff70 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -648,6 +648,7 @@ class DwarfDebug : public DebugHandlerBase { class NonTypeUnitContext { DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; + bool AddrPoolUsed; friend class DwarfDebug; NonTypeUnitContext(DwarfDebug *DD); public: diff --git a/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll b/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll new file mode 100644 index 00000000000000..98943b73aefe64 --- /dev/null +++ b/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll @@ -0,0 +1,89 @@ +; RUN: llc -filetype=obj -O0 -generate-type-units -split-dwarf-file=x.dwo < %s \ +; RUN: | llvm-dwarfdump -debug-info -debug-types - \ +; RUN: | FileCheck --implicit-check-not=Unit --implicit-check-not=contents --implicit-check-not=declaration %s + +; Test that an address-using-with-Split-DWARF type unit that references a +; non-type unit is handled correctly. A NonTypeUnitContext is used to insulate +; the type construction from being discarded when the prior/outer type has to be +; discarded due to finding it used an address & so can't be type united under +; Split DWARF. + +; The intermediate types tu and t2 are here just to test a bit more +; thoroughly/broadly. They also demonstrate one slight limitation/sub-optimality +; since 't2' isn't put in a type unit. + + +; extern int foo; +; namespace { +; struct t1 { +; }; +; } +; template struct t2 { +; t1 v1; +; }; +; struct t3 { +; t2<&foo> v1; +; }; +; t3 v1; + +; CHECK: .debug_info contents: +; CHECK: Compile Unit: + +; CHECK: .debug_info.dwo contents: +; CHECK: Compile Unit: + +; FIXME: In theory "t3" could be in a type unit - but at the moment, because it +; references t2, which needs an address, t3 gets non-type-united. +; But the same doesn't happen if t3 referenced an anonymous namespace type. + +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("t3") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "t2<&foo>" +; CHECK: DW_TAG_namespace +; CHECK: [[T1:0x[0-9a-f]*]]: DW_TAG_structure_type +; CHECK: DW_AT_name ("t1") +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("t2<&foo>") +; CHECK: DW_TAG_member +; CHECK: DW_AT_name ("v1") +; CHECK: DW_AT_type ([[T1]] "t1") + +; CHECK: .debug_types contents: + +; CHECK-NOT: .debug_types.dwo contents: + + +%struct.t3 = type { %struct.t2 } +%struct.t2 = type { %"struct.(anonymous namespace)::t1" } +%"struct.(anonymous namespace)::t1" = type { i8 } + +@v1 = dso_local global %struct.t3 zeroinitializer, align 1, !dbg !0 +@foo = external dso_local global i32, align 4 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!18, !19, !20} +!llvm.ident = !{!21} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "v1", scope: !2, file: !3, line: 16, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 12.0.0 (git@github.com:llvm/llvm-project.git be646ae2865371c7a4966797e88f355de5653e04)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "test.dwo", emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: GNU) +!3 = !DIFile(filename: "test.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch") +!4 = !{} +!5 = !{!0} +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t3", file: !3, line: 12, size: 8, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS2t3") +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !6, file: !3, line: 13, baseType: !9, size: 8) +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t2<&foo>", file: !3, line: 8, size: 8, flags: DIFlagTypePassByValue, elements: !10, templateParams: !14, identifier: "_ZTS2t2IXadL_Z3fooEEE") +!10 = !{!11} +!11 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !9, file: !3, line: 9, baseType: !12, size: 8) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", scope: !13, file: !3, line: 4, size: 8, flags: DIFlagTypePassByValue, elements: !4) +!13 = !DINamespace(scope: null) +!14 = !{!15} +!15 = !DITemplateValueParameter(type: !16, value: i32* @foo) +!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !{i32 7, !"Dwarf Version", i32 4} +!19 = !{i32 2, !"Debug Info Version", i32 3} +!20 = !{i32 1, !"wchar_size", i32 4} +!21 = !{!"clang version 12.0.0 (git@github.com:llvm/llvm-project.git be646ae2865371c7a4966797e88f355de5653e04)"} From e33ec9d90400a906314ccbd5821dbe05d070108a Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Thu, 12 Mar 2020 19:27:18 -0400 Subject: [PATCH 013/101] [clangd] Target member of dependent base made visible via a using-decl Fixes https://github.com/clangd/clangd/issues/307 Differential Revision: https://reviews.llvm.org/D86047 --- clang-tools-extra/clangd/FindTarget.cpp | 38 ++++++++++------ clang-tools-extra/clangd/XRefs.cpp | 2 +- .../clangd/unittests/FindTargetTests.cpp | 13 ++++++ .../clangd/unittests/XRefsTests.cpp | 44 ++++++++++++------- 4 files changed, 67 insertions(+), 30 deletions(-) diff --git a/clang-tools-extra/clangd/FindTarget.cpp b/clang-tools-extra/clangd/FindTarget.cpp index f73a6e58497274..9db814368a024d 100644 --- a/clang-tools-extra/clangd/FindTarget.cpp +++ b/clang-tools-extra/clangd/FindTarget.cpp @@ -100,7 +100,7 @@ CXXRecordDecl *resolveTypeToRecordDecl(const Type *T) { std::vector getMembersReferencedViaDependentName( const Type *T, llvm::function_ref NameFactory, - bool IsNonstaticMember) { + llvm::function_ref Filter) { if (!T) return {}; if (auto *ET = T->getAs()) { @@ -113,17 +113,22 @@ std::vector getMembersReferencedViaDependentName( return {}; RD = RD->getDefinition(); DeclarationName Name = NameFactory(RD->getASTContext()); - return RD->lookupDependentName(Name, [=](const NamedDecl *D) { - return IsNonstaticMember ? D->isCXXInstanceMember() - : !D->isCXXInstanceMember(); - }); + return RD->lookupDependentName(Name, Filter); } return {}; } -// Given the type T of a dependent expression that appears of the LHS of a "->", -// heuristically find a corresponding pointee type in whose scope we could look -// up the name appearing on the RHS. +const auto NonStaticFilter = [](const NamedDecl *D) { + return D->isCXXInstanceMember(); +}; +const auto StaticFilter = [](const NamedDecl *D) { + return !D->isCXXInstanceMember(); +}; +const auto ValueFilter = [](const NamedDecl *D) { return isa(D); }; + +// Given the type T of a dependent expression that appears of the LHS of a +// "->", heuristically find a corresponding pointee type in whose scope we +// could look up the name appearing on the RHS. const Type *getPointeeType(const Type *T) { if (!T) return nullptr; @@ -141,7 +146,7 @@ const Type *getPointeeType(const Type *T) { [](ASTContext &Ctx) { return Ctx.DeclarationNames.getCXXOperatorName(OO_Arrow); }, - /*IsNonStaticMember=*/true); + NonStaticFilter); if (ArrowOps.empty()) return nullptr; @@ -187,13 +192,12 @@ std::vector resolveExprToDecls(const Expr *E) { } return getMembersReferencedViaDependentName( BaseType, [ME](ASTContext &) { return ME->getMember(); }, - /*IsNonstaticMember=*/true); + NonStaticFilter); } if (const auto *RE = dyn_cast(E)) { return getMembersReferencedViaDependentName( RE->getQualifier()->getAsType(), - [RE](ASTContext &) { return RE->getDeclName(); }, - /*IsNonstaticMember=*/false); + [RE](ASTContext &) { return RE->getDeclName(); }, StaticFilter); } if (const auto *CE = dyn_cast(E)) { const auto *CalleeType = resolveExprToType(CE->getCallee()); @@ -291,7 +295,6 @@ const NamedDecl *getTemplatePattern(const NamedDecl *D) { // CXXDependentScopeMemberExpr, but some other constructs remain to be handled: // - DependentTemplateSpecializationType, // - DependentNameType -// - UnresolvedUsingValueDecl // - UnresolvedUsingTypenameDecl struct TargetFinder { using RelSet = DeclRelationSet; @@ -345,6 +348,15 @@ struct TargetFinder { } else if (const auto *NAD = dyn_cast(D)) { add(NAD->getUnderlyingDecl(), Flags | Rel::Underlying); Flags |= Rel::Alias; // continue with the alias + } else if (const UnresolvedUsingValueDecl *UUVD = + dyn_cast(D)) { + for (const NamedDecl *Target : getMembersReferencedViaDependentName( + UUVD->getQualifier()->getAsType(), + [UUVD](ASTContext &) { return UUVD->getNameInfo().getName(); }, + ValueFilter)) { + add(Target, Flags | Rel::Underlying); + } + Flags |= Rel::Alias; // continue with the alias } else if (const UsingShadowDecl *USD = dyn_cast(D)) { // Include the using decl, but don't traverse it. This may end up // including *all* shadows, which we don't want. diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 9936c67cb6e5b2..031a9c7bf5da31 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -345,7 +345,7 @@ locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier, // Give the underlying decl if navigation is triggered on a non-renaming // alias. - if (llvm::isa(D)) { + if (llvm::isa(D) || llvm::isa(D)) { // FIXME: address more complicated cases. TargetDecl(... Underlying) gives // all overload candidates, we only want the targeted one if the cursor is // on an using-alias usage, workround it with getDeclAtPosition. diff --git a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp index 2507932c5cda30..5bfdaaf6c3434c 100644 --- a/clang-tools-extra/clangd/unittests/FindTargetTests.cpp +++ b/clang-tools-extra/clangd/unittests/FindTargetTests.cpp @@ -207,6 +207,19 @@ TEST_F(TargetDeclTest, UsingDecl) { )cpp"; EXPECT_DECLS("MemberExpr", {"using X::foo", Rel::Alias}, {"int foo()", Rel::Underlying}); + + Code = R"cpp( + template + struct Base { + void waldo() {} + }; + template + struct Derived : Base { + using Base::[[waldo]]; + }; + )cpp"; + EXPECT_DECLS("UnresolvedUsingValueDecl", {"using Base::waldo", Rel::Alias}, + {"void waldo()", Rel::Underlying}); } TEST_F(TargetDeclTest, ConstructorInitList) { diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 63e8c96daab842..d2337dcbd7b318 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -1087,66 +1087,78 @@ TEST(LocateSymbol, TextualDependent) { TEST(LocateSymbol, Alias) { const char *Tests[] = { - R"cpp( + R"cpp( template struct function {}; template using [[callback]] = function; c^allback foo; )cpp", - // triggered on non-definition of a renaming alias: should not give any - // underlying decls. - R"cpp( + // triggered on non-definition of a renaming alias: should not give any + // underlying decls. + R"cpp( class Foo {}; typedef Foo [[Bar]]; B^ar b; )cpp", - R"cpp( + R"cpp( class Foo {}; using [[Bar]] = Foo; // definition Ba^r b; )cpp", - // triggered on the underlying decl of a renaming alias. - R"cpp( + // triggered on the underlying decl of a renaming alias. + R"cpp( class [[Foo]]; using Bar = Fo^o; )cpp", - // triggered on definition of a non-renaming alias: should give underlying - // decls. - R"cpp( + // triggered on definition of a non-renaming alias: should give underlying + // decls. + R"cpp( namespace ns { class [[Foo]] {}; } using ns::F^oo; )cpp", - R"cpp( + R"cpp( namespace ns { int [[x]](char); int [[x]](double); } using ns::^x; )cpp", - R"cpp( + R"cpp( namespace ns { int [[x]](char); int x(double); } using ns::x; int y = ^x('a'); )cpp", - R"cpp( + R"cpp( namespace ns { class [[Foo]] {}; } using ns::Foo; F^oo f; )cpp", - // other cases that don't matter much. - R"cpp( + // other cases that don't matter much. + R"cpp( class Foo {}; typedef Foo [[Ba^r]]; )cpp", - R"cpp( + R"cpp( class Foo {}; using [[B^ar]] = Foo; )cpp", + + // Member of dependent base + R"cpp( + template + struct Base { + void [[waldo]]() {} + }; + template + struct Derived : Base { + using Base::w^aldo; + }; + )cpp", }; for (const auto* Case : Tests) { From a4b8c2de1d393525f5333d24999031b25d0e8862 Mon Sep 17 00:00:00 2001 From: Jakub Lichman Date: Tue, 18 Aug 2020 07:11:52 +0000 Subject: [PATCH 014/101] [mlir] VectorToSCF bug in setAllocAtFunctionEntry fixed. The function makes too strong assumption regarding parent FuncOp which gets broken when FuncOp is first lowered to llvm function. In this fix we generalize the assumption to allocation scope and add assertion to produce user friendly message in case our assumption is broken. Differential Revision: https://reviews.llvm.org/D86086 --- mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index ea368c9eb14e05..95208ad231c91d 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -224,7 +224,10 @@ static Value setAllocAtFunctionEntry(MemRefType memRefMinorVectorType, Operation *op) { auto &b = ScopedContext::getBuilderRef(); OpBuilder::InsertionGuard guard(b); - b.setInsertionPointToStart(&op->getParentOfType().front()); + Operation *scope = + op->getParentWithTrait(); + assert(scope && "Expected op to be inside automatic allocation scope"); + b.setInsertionPointToStart(&scope->getRegion(0).front()); Value res = std_alloca(memRefMinorVectorType, ValueRange{}, b.getI64IntegerAttr(128)); return res; From 674f2df4fe0b6af901fc7c7e8bd3fb37e1e8516c Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Mon, 17 Aug 2020 20:25:28 +0200 Subject: [PATCH 015/101] [mlir] Fix printing of unranked memrefs in non-default memory space The type printer was ignoring the memory space on unranked memrefs. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D86096 --- mlir/lib/IR/AsmPrinter.cpp | 3 +++ mlir/test/IR/core-ops.mlir | 5 +++++ mlir/test/IR/invalid-ops.mlir | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index c8b4a864fb63a0..61eecb81108504 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -1650,6 +1650,9 @@ void ModulePrinter::printType(Type type) { .Case([&](UnrankedMemRefType memrefTy) { os << "memref<*x"; printType(memrefTy.getElementType()); + // Only print the memory space if it is the non-default one. + if (memrefTy.getMemorySpace()) + os << ", " << memrefTy.getMemorySpace(); os << '>'; }) .Case([&](ComplexType complexTy) { diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 89bcd75ffa2a84..74470719047791 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -703,6 +703,11 @@ func @memref_cast(%arg0: memref<4xf32>, %arg1 : memref, %arg2 : memref<64 return } +// Check that unranked memrefs with non-default memory space roundtrip +// properly. +// CHECK-LABEL: @unranked_memref_roundtrip(memref<*xf32, 4>) +func @unranked_memref_roundtrip(memref<*xf32, 4>) + // CHECK-LABEL: func @memref_view(%arg0 func @memref_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8> diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 6302a8a4acbf92..55739119aa26d6 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -1076,7 +1076,7 @@ func @invalid_prefetch_locality_hint(%i : index) { // incompatible memory space func @invalid_memref_cast() { %0 = alloc() : memref<2x5xf32, 0> - // expected-error@+1 {{operand type 'memref<2x5xf32>' and result type 'memref<*xf32>' are cast incompatible}} + // expected-error@+1 {{operand type 'memref<2x5xf32>' and result type 'memref<*xf32, 1>' are cast incompatible}} %1 = memref_cast %0 : memref<2x5xf32, 0> to memref<*xf32, 1> return } From b475eca1ed8b57bc7457c92b837f93db710c38bc Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 17 Aug 2020 17:58:14 +0300 Subject: [PATCH 016/101] [llvm-readobj/elf] - Merge mips-got-overlapped.test to mips-got.test and refine testing. The `mips-got-overlapped.test` was introduced in D16968 and its intention is to check that when there is an empty section at the same address as `.got`, then we are able to locate `.got` and dump it. The issue is that this test does not test llvm-readelf and uses a precompiled object. This path starts using YAML instead and merges mips-got-overlapped.test to mips-got.test. Differential revision: https://reviews.llvm.org/D86080 --- .../ELF/Inputs/got-over.exe.elf-mips | Bin 1648 -> 0 bytes .../llvm-readobj/ELF/mips-got-overlapped.test | 45 ----------- .../test/tools/llvm-readobj/ELF/mips-got.test | 72 ++++++++++++++++++ 3 files changed, 72 insertions(+), 45 deletions(-) delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/got-over.exe.elf-mips delete mode 100644 llvm/test/tools/llvm-readobj/ELF/mips-got-overlapped.test diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/got-over.exe.elf-mips b/llvm/test/tools/llvm-readobj/ELF/Inputs/got-over.exe.elf-mips deleted file mode 100644 index 27644bff3302a4ceaad06cadb9f0b4eb0dc6ec0e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1648 zcmbVMO>fgc5FI;dfbx}s5Ru>zajKBIZUsM}DYR9kv_Wc%#0AU2ZW3X~R=kDg0u?6? zNc76BKY&Az9Q!l)8%VvwduwmhMt~3_JN0yxv=*J?} zMke(ttftgtUT(?__-iJ%3@;&aTA~U~RDd(Y3W!_>Cm7>m#e&PQ2ywm_keM=qIDD-V zBHxfi)XF#q{Y~7d@yAFexbLbL54v6)sytOo;J*`|pX*6ou%`kh4_u@hjKtqdQ}Oy~ z5_$O`Nr&FsboAPbYFmu>Xk^wJPc7kB1H5{v;|*$8j!H#pYL}Qtd0glkYY#mKUV*+{ z`e(@j#2wz(RiF;cfb*^ye-2c^KS6U|`uIZq>upk+{5|#HcSO;BLR*w_I%|>~of0cP8vIkuB(-}0uw{FGu<2oGdCFHTM zOm8iMv)7ozd#?jcpm+nO-%QW3->lgAI&hd?#(d~Q(~AxG?Azgd1oH{@^9f8qt^roK zCS@)Yc?iw@ogxnq&S!=HMe<#M{}H^e$$#tEaC{(Kleq@~*Dc?nd5fSEvFoyL9gy>R zHpXOYbI+WYc}LhLa6xR|BD|cl6q9%Ee_{5=SO(Zy(fo?fF~GZ{-`+&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=NAME-ERR-NOTFOUND --implicit-check-not=warning: # NAME-ERR-NOTFOUND: warning: '[[FILE]]': unable to read the name of SHT_PROGBITS section with index 2: a section [index 2] has an invalid sh_name (0xffff) offset which goes past the end of the section name string table + +## Check that we correctly show .got section content when there are some other zero-sized +## sections with the same address as the .got section. +## In this test the empty .data section has the same address as the .got section. + +# RUN: yaml2obj --docnum=4 %s -o %t.err7.o +# RUN: llvm-readobj -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-LLVM +# RUN: llvm-readelf -A %t.err7.o 2>&1 | FileCheck %s -DFILE=%t.err7.o --check-prefix=SAME-ADDR-GNU + +# SAME-ADDR-LLVM: Primary GOT { +# SAME-ADDR-LLVM-NEXT: Canonical gp value: 0x9112 +# SAME-ADDR-LLVM-NEXT: Reserved entries [ +# SAME-ADDR-LLVM-NEXT: Entry { +# SAME-ADDR-LLVM-NEXT: Address: 0x1122 +# SAME-ADDR-LLVM-NEXT: Access: -32752 +# SAME-ADDR-LLVM-NEXT: Initial: 0x0 +# SAME-ADDR-LLVM-NEXT: Purpose: Lazy resolver +# SAME-ADDR-LLVM-NEXT: } +# SAME-ADDR-LLVM-NEXT: ] +# SAME-ADDR-LLVM-NEXT: Local entries [ +# SAME-ADDR-LLVM-NEXT: ] +# SAME-ADDR-LLVM-NEXT: Global entries [ +# SAME-ADDR-LLVM-NEXT: Entry { +# SAME-ADDR-LLVM-NEXT: Address: 0x112A +# SAME-ADDR-LLVM-NEXT: Access: -32744 +# SAME-ADDR-LLVM-NEXT: Initial: 0x0 +# SAME-ADDR-LLVM-NEXT: Value: 0x0 +# SAME-ADDR-LLVM-NEXT: Type: None (0x0) +# SAME-ADDR-LLVM-NEXT: Section: Undefined (0x0) +# SAME-ADDR-LLVM-NEXT: Name: foo (1) +# SAME-ADDR-LLVM-NEXT: } +# SAME-ADDR-LLVM-NEXT: ] +# SAME-ADDR-LLVM-NEXT: Number of TLS and multi-GOT entries: 0 +# SAME-ADDR-LLVM-NEXT: } + +# SAME-ADDR-GNU: Primary GOT: +# SAME-ADDR-GNU-NEXT: Canonical gp value: 0000000000009112 +# SAME-ADDR-GNU-EMPTY: +# SAME-ADDR-GNU-NEXT: Reserved entries: +# SAME-ADDR-GNU-NEXT: Address Access Initial Purpose +# SAME-ADDR-GNU-NEXT: 0000000000001122 -32752(gp) 0000000000000000 Lazy resolver +# SAME-ADDR-GNU-EMPTY: +# SAME-ADDR-GNU-NEXT: Global entries: +# SAME-ADDR-GNU-NEXT: Address Access Initial Sym.Val. Type Ndx Name +# SAME-ADDR-GNU-NEXT: 000000000000112a -32744(gp) 0000000000000000 0000000000000000 NOTYPE UND foo + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_MIPS +Sections: + - Name: .data + Type: SHT_PROGBITS + Address: 0x1122 + Size: 0 + - Name: .got + Type: SHT_PROGBITS + Address: 0x1122 + Size: 16 + - Name: .dynamic + Type: SHT_DYNAMIC + Entries: + - Tag: DT_MIPS_LOCAL_GOTNO + Value: 1 + - Tag: DT_MIPS_GOTSYM + Value: 1 + - Tag: DT_PLTGOT + Value: 0x1122 +DynamicSymbols: + - Name: foo From 6786b3e307175a2e26b88c161c4a7ed999ef2185 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Mon, 17 Aug 2020 16:38:56 +0300 Subject: [PATCH 017/101] [llvm-readobj/elf] - Refine the malformed-pt-dynamic.test. This is splitted out from D85519, but significantly reworked. Changes: 1) This test was changed to stop using python. 2) Use NoHeaders: true instead of `llvm-objcopy --strip-sections`. 3) Test llvm-readelf too (not just llvm-readobj). 4) Simplify the YAML used a bit (e.g. remove PT_LOAD). 5) Test 2 different cases: objects with section header table and without. Differential revision: https://reviews.llvm.org/D86073 --- .../ELF/malformed-pt-dynamic.test | 88 ++++++++++++------- 1 file changed, 56 insertions(+), 32 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test index 3ffdd57486a0e0..d73f55b5fe6a57 100644 --- a/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test +++ b/llvm/test/tools/llvm-readobj/ELF/malformed-pt-dynamic.test @@ -1,51 +1,75 @@ -# If the offset and/or size fields of the PT_DYNAMIC field become corrupted, -# we should report a sensible message. +## If the offset and/or size fields of the PT_DYNAMIC field become corrupted, +## we should report a sensible message. -# Creating such a malformed file is hard. The easiest way to simulate it is to -# truncate the file. Note that the section headers must first be stripped or -# llvm-readobj will fail to parse the file due to the section header table -# offset pointing outside the file. +## Case A: Test case where the size of the PT_DYNAMIC header is too large to fit in the file, +## but the start is within the file. -# RUN: yaml2obj %s -o %t.base -# RUN: llvm-objcopy --strip-sections %t.base %t.stripped +## Case A.1: the section header table is present in the object. Check that we report a warning about the +## broken PT_DYNAMIC header, check we dump the dynamic table. +# RUN: yaml2obj %s -DFILESIZE=0x131 -o %t1 +# RUN: llvm-readobj %t1 --dynamic-table 2>&1 | FileCheck -DFILE=%t1 %s --check-prefixes=WARN1,WARN1-LLVM +# RUN: llvm-readelf %t1 --dynamic-table 2>&1 | FileCheck -DFILE=%t1 %s --check-prefixes=WARN1,WARN1-GNU -# Test case where the size is too large to fit in the file, but the start is -# within the file. -# RUN: cp %t.stripped %t.truncated1 -# RUN: %python -c "with open(r'%t.truncated1', 'r+') as f: f.truncate(0x1001)" -# RUN: llvm-readobj %t.truncated1 --dynamic-table 2>&1 | \ -# RUN: FileCheck -DFILE=%t.truncated1 %s --check-prefix=WARN1 +# WARN1: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x131) exceeds the size of the file (0x1130) -# WARN1: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x10) exceeds the size of the file (0x1001) +# WARN1-LLVM: DynamicSection [ (1 entries) +# WARN1-LLVM-NEXT: Tag Type Name/Value +# WARN1-LLVM-NEXT: 0x0000000000000000 NULL 0x0 +# WARN1-LLVM-NEXT: ] -# Test case where the offset is too large to be in the file. -# RUN: cp %t.stripped %t.truncated2 -# RUN: %python -c "with open(r'%t.truncated2', 'r+') as f: f.truncate(0xFFF)" -# RUN: llvm-readobj %t.truncated2 --dynamic-table 2>&1 | \ -# RUN: FileCheck -DFILE=%t.truncated2 %s --check-prefix=WARN2 +# WARN1-GNU: Dynamic section at offset 0x1000 contains 1 entries: +# WARN1-GNU-NEXT: Tag Type Name/Value +# WARN1-GNU-NEXT: 0x0000000000000000 (NULL) 0x0 -# WARN2: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x10) exceeds the size of the file (0xfff) +## Case A.2: in this case we drop section headers. The dynamic table is not dumped. +# RUN: yaml2obj %s -DFILESIZE=0x119 -DNOHEADERS=true -o %t1.noheaders +# RUN: llvm-readobj %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \ +# RUN: --check-prefix=WARN1-NOHEADERS --implicit-check-not="DynamicSection [" +# RUN: llvm-readelf %t1.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t1.noheaders %s \ +# RUN: --check-prefix=WARN1-NOHEADERS --implicit-check-not="Dynamic section" + +# WARN1-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1000) + file size (0x119) exceeds the size of the file (0x1118) + +## Case B: Test case where the offset of the PT_DYNAMIC header is too large to be in the file. + +## Case B.1: the section header table is present in the object. Check that we report a warning about the +## broken PT_DYNAMIC header, but document that we do not dump the dynamic table, because +## return an error earlier. +# RUN: yaml2obj %s -DOFFSET=0x1131 -o %t2 +# RUN: not llvm-readobj %t2 --dynamic-table 2>&1 | FileCheck -DFILE=%t2 %s --check-prefix=WARN2 +# RUN: not llvm-readelf %t2 --dynamic-table 2>&1 | FileCheck -DFILE=%t2 %s --check-prefix=WARN2 + +# WARN2: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1131) + file size (0x10) exceeds the size of the file (0x1130) +# WARN2: error: '[[FILE]]': Invalid data was encountered while parsing the file + +## Case B.2: in this case we drop section headers. The dynamic table is not dumped. +# RUN: yaml2obj %s -DOFFSET=0x1119 -DNOHEADERS=true -o %t2.noheaders +# RUN: llvm-readobj %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \ +# RUN: --check-prefix=WARN2-NOHEADERS --implicit-check-not="DynamicSection [" +# RUN: llvm-readelf %t2.noheaders --dynamic-table 2>&1 | FileCheck -DFILE=%t2.noheaders %s \ +# RUN: --check-prefix=WARN2-NOHEADERS --implicit-check-not="Dynamic section" + +# WARN2-NOHEADERS: warning: '[[FILE]]': PT_DYNAMIC segment offset (0x1119) + file size (0x10) exceeds the size of the file (0x1118) --- !ELF FileHeader: Class: ELFCLASS64 Data: ELFDATA2LSB Type: ET_EXEC - Machine: EM_X86_64 + Machine: EM_NONE Sections: - - Name: .dynamic - Type: SHT_DYNAMIC - Address: 0x1000 - AddressAlign: 0x1000 + - Name: .dynamic + Type: SHT_DYNAMIC + Address: 0x1000 + Offset: 0x1000 + ShOffset: [[OFFSET=]] Entries: - Tag: DT_NULL Value: 0 ProgramHeaders: - - Type: PT_LOAD - VAddr: 0x1000 - Sections: - - Section: .dynamic - - Type: PT_DYNAMIC - VAddr: 0x1000 + - Type: PT_DYNAMIC + FileSize: [[FILESIZE=]] Sections: - Section: .dynamic +SectionHeaderTable: + NoHeaders: [[NOHEADERS=false]] From 5e361e2aa4f602a6b71d241bf4bc1013d25c3bef Mon Sep 17 00:00:00 2001 From: Shinji Okumura Date: Tue, 18 Aug 2020 18:04:47 +0900 Subject: [PATCH 018/101] [Attributor] Deduce noundef attribute This patch introduces a new abstract attribute `AANoUndef` which corresponds to `noundef` IR attribute and deduce them. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D85184 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 30 + llvm/lib/Transforms/IPO/Attributor.cpp | 9 + .../Transforms/IPO/AttributorAttributes.cpp | 116 ++++ .../2008-02-01-ReturnAttrs.ll | 8 +- .../ArgumentPromotion/X86/attributes.ll | 40 +- .../X86/min-legal-vector-width.ll | 160 ++--- .../ArgumentPromotion/X86/thiscall.ll | 24 +- .../Attributor/ArgumentPromotion/alignment.ll | 22 +- .../Attributor/ArgumentPromotion/attrs.ll | 8 +- .../Attributor/ArgumentPromotion/basictest.ll | 16 +- .../Attributor/ArgumentPromotion/byval-2.ll | 2 +- .../Attributor/ArgumentPromotion/byval.ll | 16 +- .../ArgumentPromotion/control-flow2.ll | 8 +- .../Attributor/ArgumentPromotion/inalloca.ll | 10 +- .../live_called_from_dead.ll | 8 +- .../live_called_from_dead_2.ll | 22 +- .../ArgumentPromotion/naked_functions.ll | 2 +- .../Attributor/ArgumentPromotion/profile.ll | 6 +- .../ArgumentPromotion/reserve-tbaa.ll | 4 +- .../Attributor/ArgumentPromotion/sret.ll | 14 +- .../Attributor/ArgumentPromotion/tail.ll | 2 +- .../Attributor/ArgumentPromotion/variadic.ll | 2 +- .../IPConstantProp/2009-09-24-byval-ptr.ll | 14 +- .../IPConstantProp/multiple_callbacks.ll | 8 +- .../IPConstantProp/openmp_parallel_for.ll | 20 +- .../Attributor/IPConstantProp/pthreads.ll | 32 +- .../IPConstantProp/return-argument.ll | 8 +- .../IPConstantProp/thread_local_acs.ll | 8 +- llvm/test/Transforms/Attributor/align.ll | 44 +- llvm/test/Transforms/Attributor/callbacks.ll | 56 +- llvm/test/Transforms/Attributor/depgraph.ll | 1 + .../Attributor/dereferenceable-1.ll | 98 ++- .../Transforms/Attributor/heap_to_stack.ll | 610 ++++++++++++------ .../Transforms/Attributor/internal-noalias.ll | 24 +- llvm/test/Transforms/Attributor/liveness.ll | 28 +- .../Transforms/Attributor/memory_locations.ll | 54 +- llvm/test/Transforms/Attributor/misc.ll | 12 +- llvm/test/Transforms/Attributor/misc_crash.ll | 8 +- llvm/test/Transforms/Attributor/noalias.ll | 153 +++-- .../test/Transforms/Attributor/nocapture-1.ll | 4 +- .../test/Transforms/Attributor/nocapture-2.ll | 18 +- llvm/test/Transforms/Attributor/nonnull.ll | 16 +- .../Transforms/Attributor/noreturn_async.ll | 2 +- .../Transforms/Attributor/noreturn_sync.ll | 2 +- llvm/test/Transforms/Attributor/nosync.ll | 2 +- llvm/test/Transforms/Attributor/noundef.ll | 22 + .../read_write_returned_arguments_scc.ll | 20 +- llvm/test/Transforms/Attributor/readattrs.ll | 4 +- llvm/test/Transforms/Attributor/returned.ll | 40 +- .../Attributor/undefined_behavior.ll | 56 +- .../Transforms/Attributor/value-simplify.ll | 39 +- .../Transforms/OpenMP/parallel_deletion.ll | 24 +- 52 files changed, 1214 insertions(+), 742 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/noundef.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index b6c0a17fc3e7fa..73e25417452cf8 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -3560,6 +3560,36 @@ struct AAPotentialValues static const char ID; }; +/// An abstract interface for all noundef attributes. +struct AANoUndef + : public IRAttribute> { + AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} + + /// Return true if we assume that the underlying value is noundef. + bool isAssumedNoUndef() const { return getAssumed(); } + + /// Return true if we know that underlying value is noundef. + bool isKnownNoUndef() const { return getKnown(); } + + /// Create an abstract attribute view for the position \p IRP. + static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A); + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AANoUndef"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANoUndef + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// Run options, used by the pass manager. enum AttributorRunOption { NONE = 0, diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 54ec7174f9fb8a..6cd3e059c3a19c 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1948,6 +1948,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every function with pointer return type might be marked // dereferenceable. getOrCreateAAFor(RetPos); + + // Every function with pointer return type might be marked noundef. + getOrCreateAAFor(RetPos); } } @@ -1985,6 +1988,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every argument with pointer type might be privatizable (or promotable) getOrCreateAAFor(ArgPos); + + // Every argument with pointer type might be marked noundef. + getOrCreateAAFor(ArgPos); } } @@ -2051,6 +2057,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Call site argument attribute "nofree". getOrCreateAAFor(CBArgPos); + + // Call site argument attribute "noundef". + getOrCreateAAFor(CBArgPos); } return true; }; diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index a1bcec889d1bda..721b8814542399 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -131,6 +131,7 @@ PIPE_OPERATOR(AAValueConstantRange) PIPE_OPERATOR(AAPrivatizablePtr) PIPE_OPERATOR(AAUndefinedBehavior) PIPE_OPERATOR(AAPotentialValues) +PIPE_OPERATOR(AANoUndef) #undef PIPE_OPERATOR } // namespace llvm @@ -7625,6 +7626,119 @@ struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating { } }; +/// ------------------------ NoUndef Attribute --------------------------------- +struct AANoUndefImpl : AANoUndef { + AANoUndefImpl(const IRPosition &IRP, Attributor &A) : AANoUndef(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + Value &V = getAssociatedValue(); + if (isa(V)) + indicatePessimisticFixpoint(); + else if (isa(V)) + indicateOptimisticFixpoint(); + else if (isGuaranteedNotToBeUndefOrPoison(&V)) + indicateOptimisticFixpoint(); + else + AANoUndef::initialize(A); + } + + /// See followUsesInMBEC + bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I, + AANoUndef::StateType &State) { + const Value *UseV = U->get(); + const DominatorTree *DT = nullptr; + if (Function *F = getAnchorScope()) + DT = A.getInfoCache().getAnalysisResultForFunction( + *F); + State.setKnown(isGuaranteedNotToBeUndefOrPoison(UseV, I, DT)); + bool TrackUse = false; + // Track use for instructions which must produce undef or poison bits when + // at least one operand contains such bits. + if (isa(*I) || isa(*I)) + TrackUse = true; + return TrackUse; + } + + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + return getAssumed() ? "noundef" : "may-undef-or-poison"; + } +}; + +struct AANoUndefFloating : public AANoUndefImpl { + AANoUndefFloating(const IRPosition &IRP, Attributor &A) + : AANoUndefImpl(IRP, A) {} + + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + AANoUndefImpl::initialize(A); + if (!getState().isAtFixpoint()) + if (Instruction *CtxI = getCtxI()) + followUsesInMBEC(*this, A, getState(), *CtxI); + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + auto VisitValueCB = [&](Value &V, const Instruction *CtxI, + AANoUndef::StateType &T, bool Stripped) -> bool { + const auto &AA = A.getAAFor(*this, IRPosition::value(V)); + if (!Stripped && this == &AA) { + T.indicatePessimisticFixpoint(); + } else { + const AANoUndef::StateType &S = + static_cast(AA.getState()); + T ^= S; + } + return T.isValidState(); + }; + + StateType T; + if (!genericValueTraversal( + A, getIRPosition(), *this, T, VisitValueCB, getCtxI())) + return indicatePessimisticFixpoint(); + + return clampStateAndIndicateChange(getState(), T); + } + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) } +}; + +struct AANoUndefReturned final + : AAReturnedFromReturnedValues { + AANoUndefReturned(const IRPosition &IRP, Attributor &A) + : AAReturnedFromReturnedValues(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) } +}; + +struct AANoUndefArgument final + : AAArgumentFromCallSiteArguments { + AANoUndefArgument(const IRPosition &IRP, Attributor &A) + : AAArgumentFromCallSiteArguments(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noundef) } +}; + +struct AANoUndefCallSiteArgument final : AANoUndefFloating { + AANoUndefCallSiteArgument(const IRPosition &IRP, Attributor &A) + : AANoUndefFloating(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noundef) } +}; + +struct AANoUndefCallSiteReturned final + : AACallSiteReturnedFromReturned { + AANoUndefCallSiteReturned(const IRPosition &IRP, Attributor &A) + : AACallSiteReturnedFromReturned(IRP, A) {} + + /// See AbstractAttribute::trackStatistics() + void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) } +}; } // namespace const char AAReturnedValues::ID = 0; @@ -7649,6 +7763,7 @@ const char AAMemoryBehavior::ID = 0; const char AAMemoryLocation::ID = 0; const char AAValueConstantRange::ID = 0; const char AAPotentialValues::ID = 0; +const char AANoUndef::ID = 0; // Macro magic to create the static generator function for attributes that // follow the naming scheme. @@ -7759,6 +7874,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange) CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues) +CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify) CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll index 639772d553f6ab..1a95bdb9ce351a 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -7,7 +7,7 @@ define internal i32 @deref(i32* %x) nounwind { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@deref -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_OPM-NEXT: ret i32 [[TMP2]] @@ -23,7 +23,7 @@ define internal i32 @deref(i32* %x) nounwind { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@deref -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC____-NEXT: ret i32 [[TMP2]] @@ -40,7 +40,7 @@ define i32 @f(i32 %x) { ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -59,7 +59,7 @@ define i32 @f(i32 %x) { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 [[X]], i32* [[X_ADDR]], align 4 -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X_ADDR]]) ; IS__CGSCC____-NEXT: ret i32 [[TMP1]] ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll index db349295a54d4a..c5affd398d0cdf 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -10,14 +10,14 @@ target triple = "x86_64-unknown-linux-gnu" define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@no_promote_avx2 -; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 ; NOT_TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@no_promote_avx2 -; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 @@ -36,8 +36,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -48,8 +48,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -60,8 +60,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -72,8 +72,8 @@ define void @no_promote(<4 x i64>* %arg) #1 { ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @no_promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -91,14 +91,14 @@ bb: define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@promote_avx2 -; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32 ; NOT_TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@promote_avx2 -; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32 @@ -119,8 +119,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -131,9 +131,9 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32 -; IS__TUNIT_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -144,8 +144,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_OPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -156,8 +156,8 @@ define void @promote(<4 x i64>* %arg) #0 { ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree noundef nonnull readonly align 32 dereferenceable(32) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 ; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 4274e3c89111c1..6fa2d588382e18 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -12,14 +12,14 @@ target triple = "x86_64-unknown-linux-gnu" define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -41,8 +41,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -53,9 +53,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -66,8 +66,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -78,8 +78,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -99,14 +99,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -128,8 +128,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -140,9 +140,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -153,8 +153,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -165,8 +165,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -186,14 +186,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -215,8 +215,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -227,9 +227,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -240,8 +240,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -252,8 +252,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -273,14 +273,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -302,8 +302,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -314,9 +314,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -327,8 +327,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -339,8 +339,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -360,14 +360,14 @@ bb: define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 @@ -387,8 +387,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -399,8 +399,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -411,8 +411,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -423,8 +423,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -444,14 +444,14 @@ bb: define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 @@ -471,8 +471,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -483,8 +483,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -495,8 +495,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -507,8 +507,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -528,14 +528,14 @@ bb: define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -557,8 +557,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -569,9 +569,9 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -582,8 +582,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -594,8 +594,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void @@ -615,14 +615,14 @@ bb: define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) +; NOT_TUNIT_NPM-SAME: (<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) ; NOT_TUNIT_NPM-NEXT: bb: ; NOT_TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; NOT_TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) +; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) ; IS__TUNIT_NPM-NEXT: bb: ; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 @@ -644,8 +644,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_OPM-NEXT: ret void @@ -656,9 +656,9 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__TUNIT_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__TUNIT_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 -; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) +; IS__TUNIT_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) ; IS__TUNIT_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__TUNIT_NPM-NEXT: ret void @@ -669,8 +669,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_OPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_OPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_OPM-NEXT: ret void @@ -681,8 +681,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar ; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8* -; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) -; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 64 dereferenceable(64) [[TMP]]) +; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false) +; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 ; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll index fa289c15cacd5e..f96cc52e9837af 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/thiscall.ll @@ -16,23 +16,23 @@ target triple = "i386-pc-windows-msvc19.11.0" define internal x86_thiscallcc void @internalfun(%struct.a* %this, <{ %struct.a }>* inalloca) { ; IS__TUNIT____-LABEL: define {{[^@]+}}@internalfun -; IS__TUNIT____-SAME: (%struct.a* noalias nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) +; IS__TUNIT____-SAME: (%struct.a* noalias nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 ; IS__TUNIT____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* nonnull align 4 dereferenceable(1) [[A]]) -; IS__TUNIT____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* noundef nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__TUNIT____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@internalfun -; IS__CGSCC____-SAME: (%struct.a* nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) +; IS__CGSCC____-SAME: (%struct.a* nocapture nofree readnone [[THIS:%.*]], <{ [[STRUCT_A:%.*]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[TMP0:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[A:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[TMP0]], i32 0, i32 0 ; IS__CGSCC____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A]] }>, align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = getelementptr inbounds <{ [[STRUCT_A]] }>, <{ [[STRUCT_A]] }>* [[ARGMEM]], i32 0, i32 0 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* nonnull align 4 dereferenceable(1) [[A]]) -; IS__CGSCC____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call x86_thiscallcc %struct.a* @copy_ctor(%struct.a* noundef nonnull align 4 dereferenceable(1) [[TMP1]], %struct.a* noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__CGSCC____-NEXT: call void @ext(<{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) ; IS__CGSCC____-NEXT: ret void ; entry: @@ -48,18 +48,18 @@ entry: define void @exportedfun(%struct.a* %a) { ; IS__TUNIT____-LABEL: define {{[^@]+}}@exportedfun ; IS__TUNIT____-SAME: (%struct.a* nocapture nofree readnone [[A:%.*]]) -; IS__TUNIT____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; IS__TUNIT____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call noundef i8* @llvm.stacksave() ; IS__TUNIT____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 -; IS__TUNIT____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone undef, <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) -; IS__TUNIT____-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; IS__TUNIT____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone undef, <{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__TUNIT____-NEXT: call void @llvm.stackrestore(i8* noundef [[INALLOCA_SAVE]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@exportedfun ; IS__CGSCC____-SAME: (%struct.a* nocapture nofree readnone [[A:%.*]]) -; IS__CGSCC____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call i8* @llvm.stacksave() +; IS__CGSCC____-NEXT: [[INALLOCA_SAVE:%.*]] = tail call noundef i8* @llvm.stacksave() ; IS__CGSCC____-NEXT: [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_A:%.*]] }>, align 4 -; IS__CGSCC____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone [[A]], <{ [[STRUCT_A]] }>* inalloca nonnull align 4 dereferenceable(1) [[ARGMEM]]) -; IS__CGSCC____-NEXT: call void @llvm.stackrestore(i8* [[INALLOCA_SAVE]]) +; IS__CGSCC____-NEXT: call x86_thiscallcc void @internalfun(%struct.a* noalias nocapture nofree readnone [[A]], <{ [[STRUCT_A]] }>* inalloca noundef nonnull align 4 dereferenceable(1) [[ARGMEM]]) +; IS__CGSCC____-NEXT: call void @llvm.stackrestore(i8* noundef [[INALLOCA_SAVE]]) ; IS__CGSCC____-NEXT: ret void ; %inalloca.save = tail call i8* @llvm.stacksave() diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll index 33cc4975d59608..59c590abe9e932 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll @@ -8,7 +8,7 @@ define void @f() { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@f() ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[A:%.*]] = alloca i32, align 1 -; NOT_TUNIT_NPM-NEXT: call void @g(i32* noalias nocapture nonnull readonly dereferenceable(4) [[A]]) +; NOT_TUNIT_NPM-NEXT: call void @g(i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A]]) ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f() @@ -26,7 +26,7 @@ entry: define internal void @g(i32* %a) { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@g -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nonnull readonly dereferenceable(4) [[A:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture noundef nonnull readonly dereferenceable(4) [[A:%.*]]) ; IS__TUNIT_OPM-NEXT: [[AA:%.*]] = load i32, i32* [[A]], align 1 ; IS__TUNIT_OPM-NEXT: call void @z(i32 [[AA]]) ; IS__TUNIT_OPM-NEXT: ret void @@ -40,7 +40,7 @@ define internal void @g(i32* %a) { ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@g -; IS__CGSCC____-SAME: (i32* nocapture nonnull readonly dereferenceable(4) [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull readonly dereferenceable(4) [[A:%.*]]) ; IS__CGSCC____-NEXT: [[AA:%.*]] = load i32, i32* [[A]], align 1 ; IS__CGSCC____-NEXT: call void @z(i32 [[AA]]) ; IS__CGSCC____-NEXT: ret void @@ -57,7 +57,7 @@ declare void @z(i32) define internal i32 @test(i32* %X, i64* %Y) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8 ; IS__TUNIT_OPM-NEXT: [[C:%.*]] = add i32 [[A]], 1 @@ -89,7 +89,7 @@ define internal i32 @test(i32* %X, i64* %Y) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i64* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[Y:%.*]]) ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC____-NEXT: [[B:%.*]] = load i64, i64* [[Y]], align 8 ; IS__CGSCC____-NEXT: [[C:%.*]] = add i32 [[A]], 1 @@ -116,10 +116,10 @@ Return2: define internal i32 @caller(i32* %A) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i64, align 8 ; IS__TUNIT_OPM-NEXT: store i64 1, i64* [[B]], align 8 -; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]]) +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[C]] ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn @@ -136,10 +136,10 @@ define internal i32 @caller(i32* %A) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]]) ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i64, align 8 ; IS__CGSCC____-NEXT: store i64 1, i64* [[B]], align 8 -; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[B]]) +; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i64* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[C]] ; %B = alloca i64 @@ -153,7 +153,7 @@ define i32 @callercaller() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callercaller() ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[X]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone @@ -168,7 +168,7 @@ define i32 @callercaller() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@callercaller() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[X]] ; %B = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll index e7e67c71cf509d..e4a33ef7fc232e 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll @@ -11,7 +11,7 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]], i32 [[I:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]], i32 [[I:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -44,7 +44,7 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind { ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -100,7 +100,7 @@ define i32 @test(i32* %X) { ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval align 4 [[X]], i32 zeroext 0) +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval align 4 [[X]], i32 zeroext 0) ; IS__TUNIT_OPM-NEXT: ret i32 [[C]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -129,7 +129,7 @@ define i32 @test(i32* %X) { ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readnone byval align 8 dereferenceable(12) [[S]], i32* noalias nocapture nofree nonnull readnone byval align 4 dereferenceable(4) [[X]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readnone byval align 8 dereferenceable(12) [[S]], i32* noalias nocapture nofree nonnull readnone byval align 4 dereferenceable(4) [[X]]) ; IS__CGSCC_OPM-NEXT: ret i32 [[C]] ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll index 1522dfe907f005..ea60eb5a1d4900 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll @@ -8,7 +8,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @test(i32* %X, i32* %Y) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = load i32, i32* [[Y]], align 4 ; IS__TUNIT_OPM-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] @@ -28,7 +28,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]]) ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[X]], align 4 ; IS__CGSCC____-NEXT: [[B:%.*]] = load i32, i32* [[Y]], align 4 ; IS__CGSCC____-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] @@ -43,10 +43,10 @@ define internal i32 @test(i32* %X, i32* %Y) { define internal i32 @caller(i32* %B) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[C]] ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn @@ -63,10 +63,10 @@ define internal i32 @caller(i32* %B) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[C]] ; %A = alloca i32 @@ -80,7 +80,7 @@ define i32 @callercaller() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callercaller() ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[X]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone @@ -95,7 +95,7 @@ define i32 @callercaller() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@callercaller() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[X]] ; %B = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll index 3d410cf51bcc17..484d5bcaed3a48 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll @@ -9,7 +9,7 @@ define internal void @f(%struct.ss* byval %b, i32* byval %X) nounwind { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll index f87bd4b802eb81..e04f0b02204b8c 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll @@ -11,7 +11,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @f(%struct.ss* byval %b) nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -36,7 +36,7 @@ define internal i32 @f(%struct.ss* byval %b) nounwind { ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(12) [[B:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8 @@ -71,7 +71,7 @@ entry: define internal i32 @g(%struct.ss* byval align 32 %b) nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@g -; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 32 dereferenceable(12) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 @@ -96,7 +96,7 @@ define internal i32 @g(%struct.ss* byval align 32 %b) nounwind { ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@g -; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval align 32 dereferenceable(12) [[B:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32 @@ -137,8 +137,8 @@ define i32 @main() nounwind { ; IS__TUNIT_OPM-NEXT: store i32 1, i32* [[TMP1]], align 8 ; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__TUNIT_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]]) -; IS__TUNIT_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree nonnull readonly byval align 32 dereferenceable(12) [[S]]) +; IS__TUNIT_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(12) [[S]]) +; IS__TUNIT_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval align 32 dereferenceable(12) [[S]]) ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__TUNIT_OPM-NEXT: ret i32 [[A]] ; @@ -171,8 +171,8 @@ define i32 @main() nounwind { ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[TMP1]], align 32 ; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i64 2, i64* [[TMP4]], align 4 -; IS__CGSCC_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) -; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree nonnull readnone byval align 32 dereferenceable(12) [[S]]) +; IS__CGSCC_OPM-NEXT: [[C0:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readnone byval align 32 dereferenceable(12) [[S]]) +; IS__CGSCC_OPM-NEXT: [[C1:%.*]] = call i32 @g(%struct.ss* noalias nocapture nofree noundef nonnull readnone byval align 32 dereferenceable(12) [[S]]) ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = add i32 [[C0]], [[C1]] ; IS__CGSCC_OPM-NEXT: ret i32 [[A]] ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll index ce997ba494931a..bc22fd6c862fa2 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll @@ -9,7 +9,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @callee(i1 %C, i32* %P) { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callee -; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) +; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) ; IS__TUNIT_OPM-NEXT: br label [[F:%.*]] ; IS__TUNIT_OPM: T: ; IS__TUNIT_OPM-NEXT: unreachable @@ -31,7 +31,7 @@ define internal i32 @callee(i1 %C, i32* %P) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@callee -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) ; IS__CGSCC____-NEXT: br label [[F:%.*]] ; IS__CGSCC____: T: ; IS__CGSCC____-NEXT: unreachable @@ -54,7 +54,7 @@ define i32 @foo() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@foo() ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 17, i32* [[A]], align 4 -; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]]) +; IS__TUNIT_OPM-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[X]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -69,7 +69,7 @@ define i32 @foo() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@foo() ; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 17, i32* [[A]], align 4 -; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @callee(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]]) +; IS__CGSCC____-NEXT: [[X:%.*]] = call i32 @callee(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) ; IS__CGSCC____-NEXT: ret i32 [[X]] ; %A = alloca i32 ; [#uses=2] diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll index b7ff607c270382..5da4437f3ae24f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -12,7 +12,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 define internal i32 @f(%struct.ss* inalloca %s) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@f -; IS__TUNIT____-SAME: (%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S:%.*]]) +; IS__TUNIT____-SAME: (%struct.ss* inalloca noalias nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0 ; IS__TUNIT____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 @@ -23,7 +23,7 @@ define internal i32 @f(%struct.ss* inalloca %s) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@f -; IS__CGSCC____-SAME: (%struct.ss* inalloca nocapture nofree nonnull align 4 dereferenceable(8) [[S:%.*]]) +; IS__CGSCC____-SAME: (%struct.ss* inalloca nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0 ; IS__CGSCC____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 @@ -50,7 +50,7 @@ define i32 @main() { ; IS__TUNIT____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__TUNIT____-NEXT: store i32 1, i32* [[F0]], align 4 ; IS__TUNIT____-NEXT: store i32 2, i32* [[F1]], align 4 -; IS__TUNIT____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]]) +; IS__TUNIT____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S]]) ; IS__TUNIT____-NEXT: ret i32 [[R]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn @@ -61,7 +61,7 @@ define i32 @main() { ; IS__CGSCC____-NEXT: [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1 ; IS__CGSCC____-NEXT: store i32 1, i32* [[F0]], align 4 ; IS__CGSCC____-NEXT: store i32 2, i32* [[F1]], align 4 -; IS__CGSCC____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree nonnull align 4 dereferenceable(8) [[S]]) +; IS__CGSCC____-NEXT: [[R:%.*]] = call i32 @f(%struct.ss* inalloca noalias nocapture nofree noundef nonnull align 4 dereferenceable(8) [[S]]) ; IS__CGSCC____-NEXT: ret i32 [[R]] ; entry: @@ -78,7 +78,7 @@ entry: define internal i1 @g(%struct.ss* %a, %struct.ss* inalloca %b) nounwind { ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@g -; IS__CGSCC____-SAME: (%struct.ss* nocapture nofree nonnull readnone align 4 dereferenceable(8) [[A:%.*]], %struct.ss* inalloca nocapture nofree nonnull writeonly align 4 dereferenceable(8) [[B:%.*]]) +; IS__CGSCC____-SAME: (%struct.ss* nocapture nofree noundef nonnull readnone align 4 dereferenceable(8) [[A:%.*]], %struct.ss* inalloca nocapture nofree noundef nonnull writeonly align 4 dereferenceable(8) [[B:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i1 undef ; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll index bb14e16820c769..4c2886f83aa992 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll @@ -13,7 +13,7 @@ define internal void @dead() { define internal i32 @test(i32* %X, i32* %Y) { ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree writeonly align 4 [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_OPM: live: ; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -23,7 +23,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_NPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_NPM: live: ; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -46,14 +46,14 @@ define internal i32 @caller(i32* %B) { ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller() ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) ; IS__CGSCC_OPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller() ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) ; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll index fc1e6589499032..d3bc0c4d317796 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -13,7 +13,7 @@ define internal void @dead() { define internal i32 @test(i32* %X, i32* %Y) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@test -; IS__TUNIT____-SAME: (i32* noalias nocapture nofree writeonly align 4 [[X:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef writeonly align 4 [[X:%.*]]) ; IS__TUNIT____-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__TUNIT____: live: ; IS__TUNIT____-NEXT: store i32 0, i32* [[X]], align 4 @@ -23,7 +23,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly align 4 [[X:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef writeonly align 4 [[X:%.*]]) ; IS__CGSCC_OPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_OPM: live: ; IS__CGSCC_OPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -33,7 +33,7 @@ define internal i32 @test(i32* %X, i32* %Y) { ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[X:%.*]]) ; IS__CGSCC_NPM-NEXT: br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]] ; IS__CGSCC_NPM: live: ; IS__CGSCC_NPM-NEXT: store i32 0, i32* [[X]], align 4 @@ -54,26 +54,26 @@ dead: define internal i32 @caller(i32* %B) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller -; IS__TUNIT____-SAME: (i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT____-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: store i32 1, i32* [[A]], align 4 -; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_OPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC_NPM-NEXT: [[A:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 1, i32* [[A]], align 4 -; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call i32 @test(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_NPM-NEXT: ret i32 undef ; %A = alloca i32 @@ -87,14 +87,14 @@ define i32 @callercaller() { ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@callercaller() ; NOT_CGSCC_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; NOT_CGSCC_NPM-NEXT: store i32 2, i32* [[B]], align 4 -; NOT_CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; NOT_CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; NOT_CGSCC_NPM-NEXT: ret i32 0 ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callercaller() ; IS__CGSCC_NPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 2, i32* [[B]], align 4 -; IS__CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC_NPM-NEXT: [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC_NPM-NEXT: ret i32 0 ; %B = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll index 0d3464c062fa20..dcd4feba716a00 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/naked_functions.ll @@ -11,7 +11,7 @@ define i32 @bar() { ; CHECK-LABEL: define {{[^@]+}}@bar() ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32* nonnull align 4 dereferenceable(4) @g) +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32* noundef nonnull align 4 dereferenceable(4) @g) ; CHECK-NEXT: ret i32 [[CALL]] ; entry: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll index 1c847b88b52860..94be92dc73695f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll @@ -11,7 +11,7 @@ define void @caller() #0 { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@caller() ; NOT_TUNIT_NPM-NEXT: [[X:%.*]] = alloca i32, align 4 ; NOT_TUNIT_NPM-NEXT: store i32 42, i32* [[X]], align 4 -; NOT_TUNIT_NPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 +; NOT_TUNIT_NPM-NEXT: call void @promote_i32_ptr(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0 ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller() @@ -29,7 +29,7 @@ define void @caller() #0 { define internal void @promote_i32_ptr(i32* %xp) { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@promote_i32_ptr -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) ; IS__TUNIT_OPM-NEXT: [[X:%.*]] = load i32, i32* [[XP]], align 4 ; IS__TUNIT_OPM-NEXT: call void @use_i32(i32 [[X]]) ; IS__TUNIT_OPM-NEXT: ret void @@ -43,7 +43,7 @@ define internal void @promote_i32_ptr(i32* %xp) { ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@promote_i32_ptr -; IS__CGSCC____-SAME: (i32* nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[XP:%.*]]) ; IS__CGSCC____-NEXT: [[X:%.*]] = load i32, i32* [[XP]], align 4 ; IS__CGSCC____-NEXT: call void @use_i32(i32 [[X]]) ; IS__CGSCC____-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll index 38a159608827b5..2f7e41f080cd6f 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll @@ -19,7 +19,7 @@ define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@fn -; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P1:%.*]]) +; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P1:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* @g, align 4, [[TBAA0:!tbaa !.*]] ; IS__TUNIT____-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8 @@ -51,7 +51,7 @@ define i32 @main() { ; IS__TUNIT____-NEXT: store i32* @g, i32** [[TMP0]], align 8, [[TBAA5]] ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32*, i32** @a, align 8, [[TBAA5]] ; IS__TUNIT____-NEXT: store i32 1, i32* [[TMP1]], align 4, [[TBAA0]] -; IS__TUNIT____-NEXT: call fastcc void @fn(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @g) +; IS__TUNIT____-NEXT: call fastcc void @fn(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @g) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll index a16d6fc49ee3dc..834df9a1c85447 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll @@ -11,7 +11,7 @@ define internal void @add({i32, i32}* %this, i32* sret %r) { ; ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@add -; IS__TUNIT_OPM-SAME: ({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; IS__TUNIT_OPM-SAME: ({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) ; IS__TUNIT_OPM-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 ; IS__TUNIT_OPM-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 @@ -22,7 +22,7 @@ define internal void @add({i32, i32}* %this, i32* sret %r) { ; ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@add -; IS__TUNIT_NPM-SAME: ({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; IS__TUNIT_NPM-SAME: ({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* noalias nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) ; IS__TUNIT_NPM-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 ; IS__TUNIT_NPM-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 ; IS__TUNIT_NPM-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 @@ -33,7 +33,7 @@ define internal void @add({i32, i32}* %this, i32* sret %r) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@add -; IS__CGSCC____-SAME: ({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) +; IS__CGSCC____-SAME: ({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[THIS:%.*]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R:%.*]]) ; IS__CGSCC____-NEXT: [[AP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 0 ; IS__CGSCC____-NEXT: [[BP:%.*]] = getelementptr { i32, i32 }, { i32, i32 }* [[THIS]], i32 0, i32 1 ; IS__CGSCC____-NEXT: [[A:%.*]] = load i32, i32* [[AP]], align 8 @@ -56,28 +56,28 @@ define void @f() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f() ; IS__TUNIT_OPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__TUNIT_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__TUNIT_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f() ; IS__TUNIT_NPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__TUNIT_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__TUNIT_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__TUNIT_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f() ; IS__CGSCC_OPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__CGSCC_OPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__CGSCC_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__CGSCC_OPM-NEXT: call void @add({ i32, i32 }* nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f() ; IS__CGSCC_NPM-NEXT: [[R:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: [[PAIR:%.*]] = alloca { i32, i32 }, align 8 -; IS__CGSCC_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree nonnull sret writeonly align 4 dereferenceable(4) [[R]]) +; IS__CGSCC_NPM-NEXT: call void @add({ i32, i32 }* noalias nocapture nofree noundef nonnull readonly align 8 dereferenceable(8) [[PAIR]], i32* noalias nocapture nofree noundef nonnull sret writeonly align 4 dereferenceable(4) [[R]]) ; IS__CGSCC_NPM-NEXT: ret void ; %r = alloca i32 diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll index ce4f78065d1161..685e21df3d27c0 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll @@ -34,7 +34,7 @@ define internal void @bar(%pair* byval %Data) { ; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]], align 4 ; IS__CGSCC_NPM-NEXT: [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1 ; IS__CGSCC_NPM-NEXT: store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]], align 4 -; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* nonnull align 8 dereferenceable(8) [[DATA_PRIV]]) +; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = call i8* @foo(%pair* noundef nonnull align 8 dereferenceable(8) [[DATA_PRIV]]) ; IS__CGSCC_NPM-NEXT: ret void ; tail call i8* @foo(%pair* %Data) diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll index 6120b725cc74fc..b072069f8945ef 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll @@ -21,7 +21,7 @@ define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { ; CHECK-LABEL: define {{[^@]+}}@main ; CHECK-SAME: (i32 [[ARGC:%.*]], i8** nocapture nofree readnone [[ARGV:%.*]]) ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* nonnull byval align 8 dereferenceable(16) @t45) +; CHECK-NEXT: tail call void (i8*, i8*, i8*, i8*, i8*, ...) @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* noundef nonnull byval align 8 dereferenceable(16) @t45) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll index 98051fc678ad3c..a6e27f7254dd47 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -11,7 +11,7 @@ define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@vfu1 -; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull writeonly byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull writeonly byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i32 99, i32* [[TMP0]], align 4 @@ -52,7 +52,7 @@ return: ; preds = %entry define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readonly willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@vfu2 -; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 @@ -104,7 +104,7 @@ define i32 @unions() nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions() ; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nocapture nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) +; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(8) @mystr) ; IS__TUNIT_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn @@ -132,7 +132,7 @@ entry: define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@vfu2_v2 -; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__TUNIT_OPM-NEXT: entry: ; IS__TUNIT_OPM-NEXT: [[Z:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; IS__TUNIT_OPM-NEXT: store i32 99, i32* [[Z]], align 4 @@ -165,7 +165,7 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly ; ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@vfu2_v2 -; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree nonnull byval align 8 dereferenceable(8) [[U:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval align 8 dereferenceable(8) [[U:%.*]]) ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[Z:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1 ; IS__CGSCC_OPM-NEXT: store i32 99, i32* [[Z]], align 4 @@ -212,7 +212,7 @@ define i32 @unions_v2() nounwind { ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions_v2() ; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nocapture nofree nonnull readonly byval align 8 dereferenceable(8) @mystr) +; IS__TUNIT_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nocapture nofree noundef nonnull readonly byval align 8 dereferenceable(8) @mystr) ; IS__TUNIT_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -228,7 +228,7 @@ define i32 @unions_v2() nounwind { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@unions_v2() ; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nocapture nofree nonnull readnone byval align 8 dereferenceable(8) @mystr) +; IS__CGSCC_OPM-NEXT: [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nocapture nofree noundef nonnull readnone byval align 8 dereferenceable(8) @mystr) ; IS__CGSCC_OPM-NEXT: ret i32 [[RESULT]] ; ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll index e6b8e8317a42ae..da44880a7c6fea 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll @@ -128,10 +128,10 @@ entry: define void @foo() { ; CHECK-LABEL: define {{[^@]+}}@foo() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb0, i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb0, i32 0, i32 1) -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb1, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb2, i32 0, i32 1) -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb3, i32 (i32)* nonnull @cb2, i32 (i32)* nonnull @cb3, i32 0, i32 1) -; CHECK-NEXT: call void @broker(i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 (i32)* nonnull @cb4, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb0, i32 (i32)* noundef nonnull @cb1, i32 (i32)* noundef nonnull @cb0, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb1, i32 (i32)* noundef nonnull @cb2, i32 (i32)* noundef nonnull @cb2, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb3, i32 (i32)* noundef nonnull @cb2, i32 (i32)* noundef nonnull @cb3, i32 0, i32 1) +; CHECK-NEXT: call void @broker(i32 (i32)* noundef nonnull @cb4, i32 (i32)* noundef nonnull @cb4, i32 (i32)* noundef nonnull @cb4, i32 0, i32 1) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll index 137193b972ca6c..e33db1ca6f4a06 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -36,7 +36,7 @@ define dso_local void @foo(i32 %N) { ; IS__TUNIT_OPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_OPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_OPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@foo @@ -47,7 +47,7 @@ define dso_local void @foo(i32 %N) { ; IS__TUNIT_NPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__TUNIT_NPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__TUNIT_NPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) +; IS__TUNIT_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 undef) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@foo @@ -58,7 +58,7 @@ define dso_local void @foo(i32 %N) { ; IS__CGSCC_OPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__CGSCC_OPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__CGSCC_OPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__CGSCC_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) +; IS__CGSCC_OPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@foo @@ -69,7 +69,7 @@ define dso_local void @foo(i32 %N) { ; IS__CGSCC_NPM-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 ; IS__CGSCC_NPM-NEXT: store float 3.000000e+00, float* [[P]], align 4 ; IS__CGSCC_NPM-NEXT: store i32 7, i32* [[N_ADDR]], align 4 -; IS__CGSCC_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) +; IS__CGSCC_NPM-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB1:@.*]], i32 3, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, float*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N_ADDR]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P]], i64 4617315517961601024) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -84,7 +84,7 @@ entry: define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) { ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined. -; NOT_TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 ; NOT_TUNIT_NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -103,7 +103,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; NOT_TUNIT_NPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 ; NOT_TUNIT_NPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 ; NOT_TUNIT_NPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) +; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) ; NOT_TUNIT_NPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 ; NOT_TUNIT_NPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] ; NOT_TUNIT_NPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -139,13 +139,13 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; NOT_TUNIT_NPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] ; NOT_TUNIT_NPM: omp.loop.exit: ; NOT_TUNIT_NPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) +; NOT_TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) ; NOT_TUNIT_NPM-NEXT: br label [[OMP_PRECOND_END]] ; NOT_TUNIT_NPM: omp.precond.end: ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined. -; IS__TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[Q_ADDR:%.*]] = alloca i64, align 8 ; IS__TUNIT_NPM-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -164,7 +164,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; IS__TUNIT_NPM-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 ; IS__TUNIT_NPM-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 ; IS__TUNIT_NPM-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) +; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0:@.*]], i32 [[TMP5]], i32 34, i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1) ; IS__TUNIT_NPM-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 ; IS__TUNIT_NPM-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]] ; IS__TUNIT_NPM-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] @@ -200,7 +200,7 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %. ; IS__TUNIT_NPM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] ; IS__TUNIT_NPM: omp.loop.exit: ; IS__TUNIT_NPM-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) +; IS__TUNIT_NPM-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) [[GLOB0]], i32 [[TMP12]]) ; IS__TUNIT_NPM-NEXT: br label [[OMP_PRECOND_END]] ; IS__TUNIT_NPM: omp.precond.end: ; IS__TUNIT_NPM-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll index 7ac5b42d741723..bf3ee0ff8eec5b 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll @@ -36,10 +36,10 @@ define dso_local i32 @main() { ; IS__TUNIT____-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; IS__TUNIT____-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; IS__TUNIT____-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nocapture nofree readnone align 536870912 undef) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" undef) -; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) -; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @foo, i8* noalias nocapture nofree noundef readnone align 536870912 undef) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" undef) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @baz, i8* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @buz, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]]) ; IS__TUNIT____-NEXT: ret i32 0 ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@main() @@ -47,10 +47,10 @@ define dso_local i32 @main() { ; IS__CGSCC____-NEXT: [[ALLOC1:%.*]] = alloca i8, align 8 ; IS__CGSCC____-NEXT: [[ALLOC2:%.*]] = alloca i8, align 8 ; IS__CGSCC____-NEXT: [[THREAD:%.*]] = alloca i64, align 8 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @foo, i8* noalias nocapture nofree readnone align 536870912 null) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) -; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @baz, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) -; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture align 536870912 null, i8* (i8*)* nonnull @buz, i8* noalias nofree nonnull readnone align 8 dereferenceable(1) [[ALLOC2]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @foo, i8* noalias nocapture nofree noundef readnone align 536870912 null) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*)) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @baz, i8* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @buz, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC2]]) ; IS__CGSCC____-NEXT: ret i32 0 ; entry: @@ -69,13 +69,13 @@ declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*, define internal i8* @foo(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@foo -; IS__TUNIT____-SAME: (i8* noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* null ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@foo -; IS__CGSCC____-SAME: (i8* noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* null ; @@ -86,13 +86,13 @@ entry: define internal i8* @bar(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@bar -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@bar -; IS__CGSCC____-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* nofree noundef readnone returned "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* bitcast (i8** @GlobalVPtr to i8*) ; @@ -103,13 +103,13 @@ entry: define internal i8* @baz(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@baz -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* [[ARG]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@baz -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* [[ARG]] ; @@ -120,13 +120,13 @@ entry: define internal i8* @buz(i8* %arg) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@buz -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: ret i8* [[ARG]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@buz -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[ARG:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: ret i8* [[ARG]] ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll index c6af31713bfac9..0d472837ddb845 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll @@ -8,7 +8,7 @@ define internal i32* @incdec(i1 %C, i32* %V) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@incdec -; IS__TUNIT____-SAME: (i1 [[C:%.*]], i32* noalias nofree nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) +; IS__TUNIT____-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) ; IS__TUNIT____-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: T: @@ -22,7 +22,7 @@ define internal i32* @incdec(i1 %C, i32* %V) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@incdec -; IS__CGSCC____-SAME: (i1 [[C:%.*]], i32* nofree nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) +; IS__CGSCC____-SAME: (i1 [[C:%.*]], i32* nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) ; IS__CGSCC____-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: T: @@ -78,7 +78,7 @@ define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller ; IS__TUNIT____-SAME: (i1 [[C:%.*]]) [[ATTR2:#.*]] personality i32 (...)* @__gxx_personality_v0 ; IS__TUNIT____-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__TUNIT____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) +; IS__TUNIT____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) ; IS__TUNIT____-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 1, i32 2) ; IS__TUNIT____-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 ; IS__TUNIT____-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 3, i32 4) @@ -97,7 +97,7 @@ define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller ; IS__CGSCC____-SAME: (i1 [[C:%.*]]) [[ATTR1:#.*]] personality i32 (...)* @__gxx_personality_v0 ; IS__CGSCC____-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__CGSCC____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree nonnull align 4 dereferenceable(4) [[Q]]) +; IS__CGSCC____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[Q]]) ; IS__CGSCC____-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 1, i32 2) ; IS__CGSCC____-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 ; IS__CGSCC____-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 3, i32 4) diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll index 904860e4921c7b..4405b7bc1b0955 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/thread_local_acs.ll @@ -26,7 +26,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@callee -; IS__TUNIT____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) +; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP:%.*]] = load i32, i32* [[THREAD_LOCAL_PTR]], align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @gsh, align 4 @@ -35,7 +35,7 @@ define internal i32 @callee(i32* %thread_local_ptr, i32* %shared_ptr) { ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@callee -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[THREAD_LOCAL_PTR:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[SHARED_PTR:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[TMP:%.*]] = load i32, i32* [[THREAD_LOCAL_PTR]], align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @gsh, align 4 @@ -52,12 +52,12 @@ entry: define dso_local void @caller() { ; IS__TUNIT____-LABEL: define {{[^@]+}}@caller() ; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: call void @broker(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) undef) +; IS__TUNIT____-NEXT: call void @broker(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* noundef nonnull @callee, i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) undef) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller() ; IS__CGSCC____-NEXT: entry: -; IS__CGSCC____-NEXT: call void @broker(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* nonnull @callee, i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) @gsh) +; IS__CGSCC____-NEXT: call void @broker(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @gtl, i32 (i32*, i32*)* noundef nonnull @callee, i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @gsh) ; IS__CGSCC____-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll index b31327b6a85231..7df160e817b5f9 100644 --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -149,7 +149,7 @@ define i32* @test6_2() #0 { define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@f1 -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__TUNIT____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__TUNIT____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__TUNIT____: 3: @@ -161,7 +161,7 @@ define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 { ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@f1 -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__CGSCC____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__CGSCC____: 3: @@ -191,7 +191,7 @@ define internal i8* @f2(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a1, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = tail call i8* @f3() @@ -224,7 +224,7 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a2, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) ; CHECK-NEXT: br label [[TMP4]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = phi i8* [ [[TMP3]], [[TMP2]] ], [ @a1, [[TMP0:%.*]] ] @@ -247,12 +247,12 @@ define internal i8* @f3(i8* readnone %0) local_unnamed_addr #0 { define align 4 i8* @test7() #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7() -; IS__TUNIT____-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; IS__TUNIT____-NEXT: [[C:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; IS__TUNIT____-NEXT: ret i8* [[C]] ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7() -; IS__CGSCC____-NEXT: [[C:%.*]] = tail call nonnull align 8 dereferenceable(1) i8* @f1(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; IS__CGSCC____-NEXT: [[C:%.*]] = tail call nonnull align 8 dereferenceable(1) i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) @a1) ; IS__CGSCC____-NEXT: ret i8* [[C]] ; %c = tail call i8* @f1(i8* align 8 dereferenceable(1) @a1) @@ -264,7 +264,7 @@ define align 4 i8* @test7() #0 { define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@f1b -; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__TUNIT____-SAME: (i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__TUNIT____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__TUNIT____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__TUNIT____: 3: @@ -278,7 +278,7 @@ define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 { ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@f1b -; IS__CGSCC____-SAME: (i8* nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr +; IS__CGSCC____-SAME: (i8* nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP0]], null ; IS__CGSCC____-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP5:%.*]] ; IS__CGSCC____: 3: @@ -312,7 +312,7 @@ define internal i8* @f2b(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a1, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP4:%.*]], label [[TMP2:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; CHECK-NEXT: br label [[TMP6:%.*]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = tail call i8* @f3b() @@ -346,7 +346,7 @@ define internal i8* @f3b(i8* readnone %0) local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8* @a2, null ; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]] ; CHECK: 2: -; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) +; CHECK-NEXT: [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 16 dereferenceable(1) "no-capture-maybe-returned" @a2) ; CHECK-NEXT: br label [[TMP4]] ; CHECK: 4: ; CHECK-NEXT: [[TMP5:%.*]] = phi i8* [ [[TMP3]], [[TMP2]] ], [ @a1, [[TMP0:%.*]] ] @@ -368,13 +368,13 @@ define align 4 i32* @test7b(i32* align 32 %p) #0 { ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7b ; IS__TUNIT____-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) ; IS__TUNIT____-NEXT: ret i32* [[P]] ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind uwtable ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7b ; IS__CGSCC____-SAME: (i32* nofree readnone returned align 32 "no-capture-maybe-returned" [[P:%.*]]) -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree nonnull readnone align 8 dereferenceable(1) @a1) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call i8* @f1b(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) @a1) ; IS__CGSCC____-NEXT: ret i32* [[P]] ; tail call i8* @f1b(i8* align 8 dereferenceable(1) @a1) @@ -384,12 +384,12 @@ define align 4 i32* @test7b(i32* align 32 %p) #0 { ; TEST 8 define void @test8_helper() { ; CHECK-LABEL: define {{[^@]+}}@test8_helper() -; CHECK-NEXT: [[PTR0:%.*]] = tail call i32* @unknown() -; CHECK-NEXT: [[PTR1:%.*]] = tail call align 4 i32* @unknown() -; CHECK-NEXT: [[PTR2:%.*]] = tail call align 8 i32* @unknown() -; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone [[PTR0]]) -; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 8 [[PTR2]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]]) -; CHECK-NEXT: tail call void @test8(i32* noalias nocapture readnone align 8 [[PTR2]], i32* noalias nocapture readnone align 4 [[PTR1]], i32* noalias nocapture readnone align 4 [[PTR1]]) +; CHECK-NEXT: [[PTR0:%.*]] = tail call noundef i32* @unknown() +; CHECK-NEXT: [[PTR1:%.*]] = tail call noundef align 4 i32* @unknown() +; CHECK-NEXT: [[PTR2:%.*]] = tail call noundef align 8 i32* @unknown() +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone [[PTR0]]) +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture noundef readnone align 8 [[PTR2]], i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone align 4 [[PTR1]]) +; CHECK-NEXT: tail call void @test8(i32* noalias nocapture noundef readnone align 8 [[PTR2]], i32* noalias nocapture noundef readnone align 4 [[PTR1]], i32* noalias nocapture noundef readnone align 4 [[PTR1]]) ; CHECK-NEXT: ret void ; %ptr0 = tail call i32* @unknown() @@ -406,10 +406,10 @@ declare void @user_i32_ptr(i32* nocapture readnone) nounwind define internal void @test8(i32* %a, i32* %b, i32* %c) { ; IS__TUNIT____: Function Attrs: nounwind ; IS__TUNIT____-LABEL: define {{[^@]+}}@test8 -; IS__TUNIT____-SAME: (i32* noalias nocapture readnone align 4 [[A:%.*]], i32* noalias nocapture readnone align 4 [[B:%.*]], i32* noalias nocapture readnone [[C:%.*]]) -; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture readnone align 4 [[A]]) -; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture readnone align 4 [[B]]) -; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture readnone [[C]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture noundef readnone align 4 [[A:%.*]], i32* noalias nocapture noundef readnone align 4 [[B:%.*]], i32* noalias nocapture noundef readnone [[C:%.*]]) +; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture noundef readnone align 4 [[A]]) +; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture noundef readnone align 4 [[B]]) +; IS__TUNIT____-NEXT: call void @user_i32_ptr(i32* noalias nocapture noundef readnone [[C]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nounwind diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll index f1dfacea8a96ef..7abb5fd241ce73 100644 --- a/llvm/test/Transforms/Attributor/callbacks.ll +++ b/llvm/test/Transforms/Attributor/callbacks.ll @@ -25,7 +25,7 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -37,7 +37,7 @@ define void @t0_caller(i32* %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t0_caller @@ -49,7 +49,7 @@ define void @t0_caller(i32* %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t0_caller @@ -61,7 +61,7 @@ define void @t0_caller(i32* %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -80,7 +80,7 @@ entry: define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t0_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -89,7 +89,7 @@ define internal void @t0_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t0_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -124,7 +124,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -136,7 +136,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller @@ -148,7 +148,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t1_caller @@ -160,7 +160,7 @@ define void @t1_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -180,7 +180,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; ; NOT_TUNIT_NPM: Function Attrs: nosync ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -190,7 +190,7 @@ define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; ; IS__TUNIT_NPM: Function Attrs: nosync ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t1_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -224,7 +224,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -236,7 +236,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller @@ -248,7 +248,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t2_caller @@ -260,7 +260,7 @@ define void @t2_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -281,7 +281,7 @@ entry: define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t2_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -290,7 +290,7 @@ define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t2_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -324,8 +324,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 undef, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_OPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -337,8 +337,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__TUNIT_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__TUNIT_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__TUNIT_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 undef, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__TUNIT_NPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller @@ -350,8 +350,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_OPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_OPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_OPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* nocapture align 256 [[A]], i64 99, i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t3_caller @@ -363,8 +363,8 @@ define void @t3_caller(i32* noalias %a) { ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = bitcast i32* [[B]] to i8* ; IS__CGSCC_NPM-NEXT: store i32 42, i32* [[B]], align 32 ; IS__CGSCC_NPM-NEXT: store i32* [[B]], i32** [[C]], align 64 -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) -; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* noalias nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) +; IS__CGSCC_NPM-NEXT: call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture noundef align 536870912 null, i32* noalias nocapture noundef nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture noundef bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C]]) ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -386,7 +386,7 @@ entry: define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) { ; ; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@t3_callback_callee -; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; NOT_TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; NOT_TUNIT_NPM-NEXT: entry: ; NOT_TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; NOT_TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 @@ -395,7 +395,7 @@ define internal void @t3_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, ; NOT_TUNIT_NPM-NEXT: ret void ; ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@t3_callback_callee -; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) +; IS__TUNIT_NPM-SAME: (i32* nocapture nonnull writeonly align 4 dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture noundef nonnull readonly align 64 dereferenceable(8) [[C:%.*]]) ; IS__TUNIT_NPM-NEXT: entry: ; IS__TUNIT_NPM-NEXT: [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8 ; IS__TUNIT_NPM-NEXT: store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]], align 4 diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll index 059587789035e6..f7de3287b88359 100644 --- a/llvm/test/Transforms/Attributor/depgraph.ll +++ b/llvm/test/Transforms/Attributor/depgraph.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=attributor-cgscc -disable-output -attributor-print-dep < %s 2>&1 | FileCheck %s --check-prefixes=GRAPH ; RUN: opt -passes=attributor-cgscc -disable-output -attributor-dump-dep-graph -attributor-depgraph-dot-filename-prefix=%t < %s 2>/dev/null ; RUN: FileCheck %s -input-file=%t_0.dot --check-prefix=DOT diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 3f8fb81a2636b6..816d3df44560d9 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -275,21 +275,37 @@ if.false: } define void @f7_2(i1 %c) { -; CHECK: Function Attrs: nounwind willreturn -; CHECK-LABEL: define {{[^@]+}}@f7_2 -; CHECK-SAME: (i1 [[C:%.*]]) -; CHECK-NEXT: [[PTR:%.*]] = tail call nonnull align 4 dereferenceable(4) i32* @unkown_ptr() -; CHECK-NEXT: [[A:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) [[PTR]]) -; CHECK-NEXT: [[ARG_A_0:%.*]] = load i32, i32* [[PTR]], align 4 -; CHECK-NEXT: [[B:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(4) [[PTR]]) -; CHECK-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if.true: -; CHECK-NEXT: [[C:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) [[PTR]]) -; CHECK-NEXT: [[D:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) [[PTR]]) -; CHECK-NEXT: [[E:%.*]] = tail call i32 @unkown_f(i32* nonnull align 4 dereferenceable(8) [[PTR]]) -; CHECK-NEXT: ret void -; CHECK: if.false: -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM: Function Attrs: nounwind willreturn +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@f7_2 +; NOT_CGSCC_NPM-SAME: (i1 [[C:%.*]]) +; NOT_CGSCC_NPM-NEXT: [[PTR:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) i32* @unkown_ptr() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: [[ARG_A_0:%.*]] = load i32, i32* [[PTR]], align 4 +; NOT_CGSCC_NPM-NEXT: [[B:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; NOT_CGSCC_NPM: if.true: +; NOT_CGSCC_NPM-NEXT: [[C:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: [[D:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: [[E:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; NOT_CGSCC_NPM-NEXT: ret void +; NOT_CGSCC_NPM: if.false: +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC_NPM: Function Attrs: nounwind willreturn +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f7_2 +; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) +; IS__CGSCC_NPM-NEXT: [[PTR:%.*]] = tail call nonnull align 4 dereferenceable(4) i32* @unkown_ptr() +; IS__CGSCC_NPM-NEXT: [[A:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC_NPM-NEXT: [[ARG_A_0:%.*]] = load i32, i32* [[PTR]], align 4 +; IS__CGSCC_NPM-NEXT: [[B:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC_NPM-NEXT: br i1 [[C]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; IS__CGSCC_NPM: if.true: +; IS__CGSCC_NPM-NEXT: [[C:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; IS__CGSCC_NPM-NEXT: [[D:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; IS__CGSCC_NPM-NEXT: [[E:%.*]] = tail call i32 @unkown_f(i32* noundef nonnull align 4 dereferenceable(8) [[PTR]]) +; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC_NPM: if.false: +; IS__CGSCC_NPM-NEXT: ret void ; %ptr = tail call i32* @unkown_ptr() %A = tail call i32 @unkown_f(i32* %ptr) @@ -1017,23 +1033,41 @@ define void @nonnull_assume_call(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) { ; ATTRIBUTOR-NEXT: call void @unknown() ; ATTRIBUTOR-NEXT: ret void ; -; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_call -; CHECK-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) -; CHECK-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) -; CHECK-NEXT: call void @unknown_use32(i32* nonnull dereferenceable(101) [[P]]) -; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@nonnull_assume_call +; NOT_CGSCC_OPM-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown() +; NOT_CGSCC_OPM-NEXT: [[P:%.*]] = call noundef nonnull dereferenceable(101) i32* @unkown_ptr() +; NOT_CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; NOT_CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; NOT_CGSCC_OPM-NEXT: call void @unknown() +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@nonnull_assume_call +; IS__CGSCC_OPM-SAME: (i8* [[ARG1:%.*]], i8* [[ARG2:%.*]], i8* [[ARG3:%.*]], i8* [[ARG4:%.*]]) +; IS__CGSCC_OPM-NEXT: call void @unknown() +; IS__CGSCC_OPM-NEXT: [[P:%.*]] = call nonnull dereferenceable(101) i32* @unkown_ptr() +; IS__CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i32* [[P]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ] +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(2) [[ARG1]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(31) [[ARG2]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull [[ARG3]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use8(i8* nonnull dereferenceable(42) [[ARG4]]) +; IS__CGSCC_OPM-NEXT: call void @unknown_use32(i32* noundef nonnull dereferenceable(101) [[P]]) +; IS__CGSCC_OPM-NEXT: call void @unknown() +; IS__CGSCC_OPM-NEXT: ret void ; call void @unknown() %p = call i32* @unkown_ptr() diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 28c0166dd0cd62..3451fa8a59f44f 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -43,12 +43,19 @@ define void @nofree_arg_only(i8* %p1, i8* %p2) { ; TEST 1 - negative, pointer freed in another function. define void @test1() { -; CHECK-LABEL: define {{[^@]+}}@test1() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: tail call void (...) @func_throws() -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test1() +; NOT_CGSCC_NPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_NPM-NEXT: tail call void (...) @func_throws() +; NOT_CGSCC_NPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test1() +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_NPM-NEXT: tail call void @nocapture_func_frees_pointer(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_NPM-NEXT: tail call void (...) @func_throws() +; IS__CGSCC_NPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_NPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nocapture_func_frees_pointer(i8* %1) @@ -60,11 +67,17 @@ define void @test1() { ; TEST 2 - negative, call to a sync function. define void @test2() { -; CHECK-LABEL: define {{[^@]+}}@test2() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @sync_func(i8* [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test2() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @sync_func(i8* noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test2() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @sync_func(i8* noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @sync_func(i8* %1) @@ -75,16 +88,22 @@ define void @test2() { ; TEST 3 - 1 malloc, 1 free define void @test3() { -; IS________OPM-LABEL: define {{[^@]+}}@test3() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test3() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -93,18 +112,25 @@ define void @test3() { } define void @test3a(i8* %p) { -; IS________OPM-LABEL: define {{[^@]+}}@test3a -; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test3a +; IS__TUNIT_OPM-SAME: (i8* nocapture [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3a ; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3a +; IS__CGSCC_OPM-SAME: (i8* nocapture [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nofree_arg_only(i8* %1, i8* %p) @@ -117,15 +143,15 @@ declare noalias i8* @aligned_alloc(i64, i64) define void @test3b(i8* %p) { ; IS________OPM-LABEL: define {{[^@]+}}@test3b ; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 32, i64 128) -; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @aligned_alloc(i64 32, i64 128) +; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test3b ; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 128, align 32 -; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree noundef [[TMP1]], i8* nocapture [[P]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @aligned_alloc(i64 32, i64 128) @@ -136,11 +162,17 @@ define void @test3b(i8* %p) { ; leave alone non-constant alignments. define void @test3c(i64 %alignment) { -; CHECK-LABEL: define {{[^@]+}}@test3c -; CHECK-SAME: (i64 [[ALIGNMENT:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 128) -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test3c +; NOT_CGSCC_OPM-SAME: (i64 [[ALIGNMENT:%.*]]) +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 128) +; NOT_CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test3c +; IS__CGSCC_OPM-SAME: (i64 [[ALIGNMENT:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 [[ALIGNMENT]], i64 128) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @aligned_alloc(i64 %alignment, i64 128) tail call void @free(i8* %1) @@ -151,16 +183,16 @@ declare noalias i8* @calloc(i64, i64) define void @test0() { ; IS________OPM-LABEL: define {{[^@]+}}@test0() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 2, i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @calloc(i64 2, i64 4) +; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) ; IS________OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test0() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 8, align 1 ; IS________NPM-NEXT: [[CALLOC_BC:%.*]] = bitcast i8* [[TMP1]] to i8* ; IS________NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALLOC_BC]], i8 0, i64 8, i1 false) -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: ret void ; %1 = tail call noalias i8* @calloc(i64 2, i64 4) @@ -171,15 +203,20 @@ define void @test0() { ; TEST 4 define void @test4() { -; IS________OPM-LABEL: define {{[^@]+}}@test4() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test4() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test4() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test4() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @nofree_func(i8* %1) @@ -190,20 +227,20 @@ define void @test4() { ; are in nofree functions and are not captured define void @test5(i32, i8* %p) { -; IS________OPM-LABEL: define {{[^@]+}}@test5 -; IS________OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; IS________OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] -; IS________OPM: 4: -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6:%.*]] -; IS________OPM: 5: -; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP2]], i8* nocapture [[P]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6]] -; IS________OPM: 6: -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test5 +; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__TUNIT_OPM: 4: +; IS__TUNIT_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6:%.*]] +; IS__TUNIT_OPM: 5: +; IS__TUNIT_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP2]], i8* nocapture [[P]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6]] +; IS__TUNIT_OPM: 6: +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test5 ; IS________NPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) @@ -211,13 +248,28 @@ define void @test5(i32, i8* %p) { ; IS________NPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; IS________NPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; IS________NPM: 4: -; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) +; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) ; IS________NPM-NEXT: br label [[TMP6:%.*]] ; IS________NPM: 5: -; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP2]], i8* nocapture [[P]]) +; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree noundef [[TMP2]], i8* nocapture [[P]]) ; IS________NPM-NEXT: br label [[TMP6]] ; IS________NPM: 6: ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test5 +; IS__CGSCC_OPM-SAME: (i32 [[TMP0:%.*]], i8* nocapture [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__CGSCC_OPM: 4: +; IS__CGSCC_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6:%.*]] +; IS__CGSCC_OPM: 5: +; IS__CGSCC_OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree noundef [[TMP2]], i8* nocapture [[P]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6]] +; IS__CGSCC_OPM: 6: +; IS__CGSCC_OPM-NEXT: ret void ; %2 = tail call noalias i8* @malloc(i64 4) %3 = icmp eq i32 %0, 0 @@ -239,20 +291,20 @@ define void @test5(i32, i8* %p) { ; TEST 6 - all exit paths have a call to free define void @test6(i32) { -; IS________OPM-LABEL: define {{[^@]+}}@test6 -; IS________OPM-SAME: (i32 [[TMP0:%.*]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; IS________OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] -; IS________OPM: 4: -; IS________OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6:%.*]] -; IS________OPM: 5: -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP2]]) -; IS________OPM-NEXT: br label [[TMP6]] -; IS________OPM: 6: -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test6 +; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__TUNIT_OPM: 4: +; IS__TUNIT_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6:%.*]] +; IS__TUNIT_OPM: 5: +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__TUNIT_OPM-NEXT: br label [[TMP6]] +; IS__TUNIT_OPM: 6: +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test6 ; IS________NPM-SAME: (i32 [[TMP0:%.*]]) @@ -260,12 +312,27 @@ define void @test6(i32) { ; IS________NPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 ; IS________NPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] ; IS________NPM: 4: -; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree [[TMP2]]) +; IS________NPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) ; IS________NPM-NEXT: br label [[TMP6:%.*]] ; IS________NPM: 5: ; IS________NPM-NEXT: br label [[TMP6]] ; IS________NPM: 6: ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test6 +; IS__CGSCC_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP3]], label [[TMP5:%.*]], label [[TMP4:%.*]] +; IS__CGSCC_OPM: 4: +; IS__CGSCC_OPM-NEXT: tail call void @nofree_func(i8* noalias nocapture nofree noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6:%.*]] +; IS__CGSCC_OPM: 5: +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP2]]) +; IS__CGSCC_OPM-NEXT: br label [[TMP6]] +; IS__CGSCC_OPM: 6: +; IS__CGSCC_OPM-NEXT: ret void ; %2 = tail call noalias i8* @malloc(i64 4) %3 = icmp eq i32 %0, 0 @@ -308,14 +375,23 @@ define void @test7() { ; TEST 8 - Negative: bitcast pointer used in capture function define void @test8() { -; CHECK-LABEL: define {{[^@]+}}@test8() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @foo(i32* align 4 [[TMP2]]) -; CHECK-NEXT: tail call void @free(i8* nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test8() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @foo(i32* noundef align 4 [[TMP2]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test8() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @foo(i32* noundef align 4 [[TMP2]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -329,14 +405,23 @@ define void @test8() { ; TEST 9 - FIXME: malloc should be converted. define void @test9() { -; CHECK-LABEL: define {{[^@]+}}@test9() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @foo_nounw(i32* nofree align 4 [[TMP2]]) -; CHECK-NEXT: tail call void @free(i8* nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test9() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test9() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -351,22 +436,31 @@ define void @test9() { ; TEST 10 - 1 malloc, 1 free define i32 @test10() { -; IS________OPM-LABEL: define {{[^@]+}}@test10() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; IS________OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; IS________OPM-NEXT: ret i32 [[TMP3]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test10() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP3]] ; ; IS________NPM-LABEL: define {{[^@]+}}@test10() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) ; IS________NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; IS________NPM-NEXT: store i32 10, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test10() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -378,24 +472,34 @@ define i32 @test10() { } define i32 @test_lifetime() { -; IS________OPM-LABEL: define {{[^@]+}}@test_lifetime() -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; IS________OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; IS________OPM-NEXT: ret i32 [[TMP3]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test_lifetime() +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP3]] ; ; IS________NPM-LABEL: define {{[^@]+}}@test_lifetime() ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; IS________NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS________NPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) ; IS________NPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; IS________NPM-NEXT: store i32 10, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; IS________NPM-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_lifetime() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @no_sync_func(i8* %1) @@ -410,11 +514,17 @@ define i32 @test_lifetime() { ; TEST 11 define void @test11() { -; CHECK-LABEL: define {{[^@]+}}@test11() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test11() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: tail call void @sync_will_return(i8* noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test11() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @sync_will_return(i8* noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) tail call void @sync_will_return(i8* %1) @@ -424,37 +534,37 @@ define void @test11() { ; TEST 12 define i32 @irreducible_cfg(i32 %0) { -; IS________OPM-LABEL: define {{[^@]+}}@irreducible_cfg -; IS________OPM-SAME: (i32 [[TMP0:%.*]]) -; IS________OPM-NEXT: [[TMP2:%.*]] = call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* -; IS________OPM-NEXT: store i32 10, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1 -; IS________OPM-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] -; IS________OPM: 5: -; IS________OPM-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5 -; IS________OPM-NEXT: br label [[TMP13:%.*]] -; IS________OPM: 7: -; IS________OPM-NEXT: br label [[TMP8:%.*]] -; IS________OPM: 8: -; IS________OPM-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ] -; IS________OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 -; IS________OPM-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 4 -; IS________OPM-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0 -; IS________OPM-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]] -; IS________OPM: 12: -; IS________OPM-NEXT: br label [[TMP13]] -; IS________OPM: 13: -; IS________OPM-NEXT: [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ] -; IS________OPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 -; IS________OPM-NEXT: br label [[TMP8]] -; IS________OPM: 15: -; IS________OPM-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8* -; IS________OPM-NEXT: call void @free(i8* nocapture [[TMP17]]) -; IS________OPM-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4 -; IS________OPM-NEXT: ret i32 [[TMP18]] +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@irreducible_cfg +; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* +; IS__TUNIT_OPM-NEXT: store i32 10, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] +; IS__TUNIT_OPM: 5: +; IS__TUNIT_OPM-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5 +; IS__TUNIT_OPM-NEXT: br label [[TMP13:%.*]] +; IS__TUNIT_OPM: 7: +; IS__TUNIT_OPM-NEXT: br label [[TMP8:%.*]] +; IS__TUNIT_OPM: 8: +; IS__TUNIT_OPM-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ] +; IS__TUNIT_OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 +; IS__TUNIT_OPM-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0 +; IS__TUNIT_OPM-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]] +; IS__TUNIT_OPM: 12: +; IS__TUNIT_OPM-NEXT: br label [[TMP13]] +; IS__TUNIT_OPM: 13: +; IS__TUNIT_OPM-NEXT: [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ] +; IS__TUNIT_OPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 +; IS__TUNIT_OPM-NEXT: br label [[TMP8]] +; IS__TUNIT_OPM: 15: +; IS__TUNIT_OPM-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8* +; IS__TUNIT_OPM-NEXT: call void @free(i8* nocapture noundef [[TMP17]]) +; IS__TUNIT_OPM-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__TUNIT_OPM-NEXT: ret i32 [[TMP18]] ; ; IS________NPM-LABEL: define {{[^@]+}}@irreducible_cfg ; IS________NPM-SAME: (i32 [[TMP0:%.*]]) @@ -485,6 +595,38 @@ define i32 @irreducible_cfg(i32 %0) { ; IS________NPM-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP3]] to i8* ; IS________NPM-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4 ; IS________NPM-NEXT: ret i32 [[TMP17]] +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@irreducible_cfg +; IS__CGSCC_OPM-SAME: (i32 [[TMP0:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32* +; IS__CGSCC_OPM-NEXT: store i32 10, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]] +; IS__CGSCC_OPM: 5: +; IS__CGSCC_OPM-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5 +; IS__CGSCC_OPM-NEXT: br label [[TMP13:%.*]] +; IS__CGSCC_OPM: 7: +; IS__CGSCC_OPM-NEXT: br label [[TMP8:%.*]] +; IS__CGSCC_OPM: 8: +; IS__CGSCC_OPM-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ] +; IS__CGSCC_OPM-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1 +; IS__CGSCC_OPM-NEXT: store i32 [[TMP10]], i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0 +; IS__CGSCC_OPM-NEXT: br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]] +; IS__CGSCC_OPM: 12: +; IS__CGSCC_OPM-NEXT: br label [[TMP13]] +; IS__CGSCC_OPM: 13: +; IS__CGSCC_OPM-NEXT: [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ] +; IS__CGSCC_OPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 +; IS__CGSCC_OPM-NEXT: br label [[TMP8]] +; IS__CGSCC_OPM: 15: +; IS__CGSCC_OPM-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8* +; IS__CGSCC_OPM-NEXT: call void @free(i8* nocapture noundef [[TMP17]]) +; IS__CGSCC_OPM-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS__CGSCC_OPM-NEXT: ret i32 [[TMP18]] ; %2 = call noalias i8* @malloc(i64 4) %3 = bitcast i8* %2 to i32* @@ -589,14 +731,23 @@ define i32 @malloc_in_loop(i32 %0) { ; Malloc/Calloc too large define i32 @test13() { -; CHECK-LABEL: define {{[^@]+}}@test13() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 256) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret i32 [[TMP3]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@test13() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 256) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test13() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 256) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 256) tail call void @no_sync_func(i8* %1) @@ -608,14 +759,23 @@ define i32 @test13() { } define i32 @test_sle() { -; CHECK-LABEL: define {{[^@]+}}@test_sle() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 -1) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret i32 [[TMP3]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@test_sle() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 -1) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test_sle() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 -1) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @malloc(i64 -1) tail call void @no_sync_func(i8* %1) @@ -627,14 +787,23 @@ define i32 @test_sle() { } define i32 @test_overflow() { -; CHECK-LABEL: define {{[^@]+}}@test_overflow() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 65537, i64 65537) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* -; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @free(i8* noalias nocapture nonnull align 4 dereferenceable(4) [[TMP1]]) -; CHECK-NEXT: ret i32 [[TMP3]] +; IS__TUNIT____-LABEL: define {{[^@]+}}@test_overflow() +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @calloc(i64 65537, i64 65537) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__TUNIT____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__TUNIT____-NEXT: ret i32 [[TMP3]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test_overflow() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 65537, i64 65537) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; IS__CGSCC____-NEXT: store i32 10, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +; IS__CGSCC____-NEXT: ret i32 [[TMP3]] ; %1 = tail call noalias i8* @calloc(i64 65537, i64 65537) tail call void @no_sync_func(i8* %1) @@ -646,11 +815,17 @@ define i32 @test_overflow() { } define void @test14() { -; CHECK-LABEL: define {{[^@]+}}@test14() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 64, i64 4) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test14() +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @calloc(i64 64, i64 4) +; NOT_CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test14() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @calloc(i64 64, i64 4) +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @calloc(i64 64, i64 4) tail call void @no_sync_func(i8* %1) @@ -659,12 +834,19 @@ define void @test14() { } define void @test15(i64 %S) { -; CHECK-LABEL: define {{[^@]+}}@test15 -; CHECK-SAME: (i64 [[S:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 [[S]]) -; CHECK-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test15 +; IS__TUNIT____-SAME: (i64 [[S:%.*]]) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 [[S]]) +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test15 +; IS__CGSCC____-SAME: (i64 [[S:%.*]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 [[S]]) +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* noalias nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 %S) tail call void @no_sync_func(i8* %1) @@ -673,20 +855,28 @@ define void @test15(i64 %S) { } define void @test16a(i8 %v, i8** %P) { -; IS________OPM-LABEL: define {{[^@]+}}@test16a -; IS________OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) -; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 -; IS________OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree nonnull dereferenceable(1) [[TMP1]]) -; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture nonnull dereferenceable(1) [[TMP1]]) -; IS________OPM-NEXT: ret void +; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test16a +; IS__TUNIT_OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) +; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_OPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 +; IS__TUNIT_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__TUNIT_OPM-NEXT: ret void ; ; IS________NPM-LABEL: define {{[^@]+}}@test16a ; IS________NPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) ; IS________NPM-NEXT: [[TMP1:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 -; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree nonnull dereferenceable(1) [[TMP1]]) +; IS________NPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[TMP1]]) ; IS________NPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test16a +; IS__CGSCC_OPM-SAME: (i8 [[V:%.*]], i8** nocapture nofree readnone [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: store i8 [[V]], i8* [[TMP1]], align 1 +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* noalias nocapture nofree noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* noalias nocapture noundef nonnull dereferenceable(1) [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) store i8 %v, i8* %1 @@ -696,13 +886,21 @@ define void @test16a(i8 %v, i8** %P) { } define void @test16b(i8 %v, i8** %P) { -; CHECK-LABEL: define {{[^@]+}}@test16b -; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 -; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS__TUNIT____-LABEL: define {{[^@]+}}@test16b +; IS__TUNIT____-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT____-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; IS__TUNIT____-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; IS__TUNIT____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__TUNIT____-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test16b +; IS__CGSCC____-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; IS__CGSCC____-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; IS__CGSCC____-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC____-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) store i8* %1, i8** %P @@ -712,13 +910,21 @@ define void @test16b(i8 %v, i8** %P) { } define void @test16c(i8 %v, i8** %P) { -; CHECK-LABEL: define {{[^@]+}}@test16c -; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 -; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) -; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test16c +; NOT_CGSCC_OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_OPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; NOT_CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test16c +; IS__CGSCC_OPM-SAME: (i8 [[V:%.*]], i8** nocapture writeonly [[P:%.*]]) +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC_OPM-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 +; IS__CGSCC_OPM-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: tail call void @free(i8* nocapture noundef [[TMP1]]) +; IS__CGSCC_OPM-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) store i8* %1, i8** %P diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll index d4e27d24bc5535..b71c07bd402093 100644 --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -96,8 +96,8 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree readonly align 4 [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT____-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; IS__TUNIT____-NEXT: ret i32 [[ADD]] ; @@ -107,8 +107,8 @@ define dso_local i32 @visible_local(i32* %A) #0 { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) -; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32 @noalias_args(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32 @noalias_args_argmem(i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL1]], [[CALL2]] ; IS__CGSCC____-NEXT: ret i32 [[ADD]] ; @@ -124,7 +124,7 @@ entry: define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree noinline nosync nounwind readonly uwtable willreturn ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@noalias_args_argmem_ro -; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT_OPM-NEXT: [[T0:%.*]] = load i32, i32* [[A]], align 4 ; IS__TUNIT_OPM-NEXT: [[T1:%.*]] = load i32, i32* [[B]], align 4 ; IS__TUNIT_OPM-NEXT: [[ADD:%.*]] = add nsw i32 [[T0]], [[T1]] @@ -144,7 +144,7 @@ define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind readonly uwtable willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noalias_args_argmem_ro -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A:%.*]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC____-NEXT: [[T0:%.*]] = load i32, i32* [[A]], align 4 ; IS__CGSCC____-NEXT: [[T1:%.*]] = load i32, i32* [[B]], align 4 ; IS__CGSCC____-NEXT: [[ADD:%.*]] = add nsw i32 [[T0]], [[T1]] @@ -161,7 +161,7 @@ define i32 @visible_local_2() { ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@visible_local_2() ; IS__TUNIT_OPM-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT_OPM-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__TUNIT_OPM-NEXT: ret i32 [[CALL]] ; ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn @@ -177,7 +177,7 @@ define i32 @visible_local_2() { ; IS__CGSCC____-LABEL: define {{[^@]+}}@visible_local_2() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[CALL]] ; %B = alloca i32, align 4 @@ -189,14 +189,14 @@ define i32 @visible_local_2() { define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { ; IS__TUNIT____: Function Attrs: argmemonly nofree noinline nosync nounwind uwtable willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@noalias_args_argmem_rn -; IS__TUNIT____-SAME: (i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) ; IS__TUNIT____-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 ; IS__TUNIT____-NEXT: store i32 0, i32* [[B]], align 4 ; IS__TUNIT____-NEXT: ret i32 [[T0]] ; ; IS__CGSCC____: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind uwtable willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@noalias_args_argmem_rn -; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[B:%.*]]) +; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) ; IS__CGSCC____-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 ; IS__CGSCC____-NEXT: store i32 0, i32* [[B]], align 4 ; IS__CGSCC____-NEXT: ret i32 [[T0]] @@ -211,14 +211,14 @@ define i32 @visible_local_3() { ; IS__TUNIT____-LABEL: define {{[^@]+}}@visible_local_3() ; IS__TUNIT____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) ; IS__TUNIT____-NEXT: ret i32 [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@visible_local_3() ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree nonnull align 4 dereferenceable(4) [[B]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) ; IS__CGSCC____-NEXT: ret i32 [[CALL]] ; %B = alloca i32, align 4 diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index f3bd7ef1460a85..62d195a1f66bbf 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1758,12 +1758,12 @@ define void @call_via_pointer_with_dead_args(i32* %a, i32* %b, void (i32*, i32*, ; FIXME: We have to prevent the propagation of %fp in the new pm CGSCC pass until the CallGraphUpdater can handle the new call edge. define internal void @call_via_pointer_with_dead_args_internal_a(i32* %a, i32* %b, void (i32*, i32*, i32*, i64, i32**)* %fp) { ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_a -; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; NOT_CGSCC_NPM-NEXT: call void @called_via_pointer(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_a -; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; IS__CGSCC____-NEXT: call void [[FP]](i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; IS__CGSCC____-NEXT: ret void ; @@ -1772,12 +1772,12 @@ define internal void @call_via_pointer_with_dead_args_internal_a(i32* %a, i32* % } define internal void @call_via_pointer_with_dead_args_internal_b(i32* %a, i32* %b, void (i32*, i32*, i32*, i64, i32**)* %fp) { ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_b -; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; NOT_CGSCC_NPM-NEXT: call void @called_via_pointer_internal_2(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_b -; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull [[FP:%.*]]) +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) ; IS__CGSCC____-NEXT: call void [[FP]](i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) ; IS__CGSCC____-NEXT: ret void ; @@ -1791,10 +1791,10 @@ define void @call_via_pointer_with_dead_args_caller(i32* %a, i32* %b) { ; NOT_CGSCC_NPM-NEXT: [[PTR2:%.*]] = alloca i32, align 128 ; NOT_CGSCC_NPM-NEXT: [[PTR3:%.*]] = alloca i32, align 128 ; NOT_CGSCC_NPM-NEXT: [[PTR4:%.*]] = alloca i32, align 128 -; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer) -; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer_internal_1) -; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer) -; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree @called_via_pointer_internal_2) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer_internal_1) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer) +; NOT_CGSCC_NPM-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer_internal_2) ; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller @@ -1803,10 +1803,10 @@ define void @call_via_pointer_with_dead_args_caller(i32* %a, i32* %b) { ; IS__CGSCC____-NEXT: [[PTR2:%.*]] = alloca i32, align 128 ; IS__CGSCC____-NEXT: [[PTR3:%.*]] = alloca i32, align 128 ; IS__CGSCC____-NEXT: [[PTR4:%.*]] = alloca i32, align 128 -; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) -; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_1) -; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer) -; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree nonnull @called_via_pointer_internal_2) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer_internal_1) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer_internal_2) ; IS__CGSCC____-NEXT: ret void ; %ptr1 = alloca i32, align 128 @@ -1997,7 +1997,7 @@ define void @bad_gep() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = alloca i8, align 1 ; CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* noalias nocapture nonnull dereferenceable(1) [[N]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* noalias nocapture noundef nonnull dereferenceable(1) [[N]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: while.body: ; CHECK-NEXT: unreachable @@ -2006,7 +2006,7 @@ define void @bad_gep() { ; CHECK: if.end: ; CHECK-NEXT: unreachable ; CHECK: exit: -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* noalias nocapture nonnull dereferenceable(1) [[N]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* noalias nocapture noundef nonnull dereferenceable(1) [[N]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll index 23bff129101805..aeb66e9dbe4a20 100644 --- a/llvm/test/Transforms/Attributor/memory_locations.ll +++ b/llvm/test/Transforms/Attributor/memory_locations.ll @@ -327,7 +327,7 @@ define void @callerB1() { ; CHECK: Function Attrs: readnone ; CHECK-LABEL: define {{[^@]+}}@callerB1() ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* nonnull dereferenceable(1) [[STACK]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* noundef nonnull dereferenceable(1) [[STACK]]) ; CHECK-NEXT: ret void ; %stack = alloca i8 @@ -338,7 +338,7 @@ define void @callerB2() { ; CHECK: Function Attrs: inaccessiblememonly ; CHECK-LABEL: define {{[^@]+}}@callerB2() ; CHECK-NEXT: [[STACK:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* nonnull dereferenceable(1) [[STACK]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noundef nonnull dereferenceable(1) [[STACK]]) ; CHECK-NEXT: ret void ; %stack = alloca i8 @@ -346,20 +346,30 @@ define void @callerB2() { ret void } define void @callerC1() { -; CHECK-LABEL: define {{[^@]+}}@callerC1() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* [[UNKNOWN]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@callerC1() +; NOT_CGSCC_NPM-NEXT: [[UNKNOWN:%.*]] = call noundef i8* @unknown_ptr() +; NOT_CGSCC_NPM-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* noundef [[UNKNOWN]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callerC1() +; IS__CGSCC_NPM-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() +; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = call i8* @argmem_only(i8* noundef [[UNKNOWN]]) +; IS__CGSCC_NPM-NEXT: ret void ; %unknown = call i8* @unknown_ptr() call i8* @argmem_only(i8* %unknown) ret void } define void @callerC2() { -; CHECK-LABEL: define {{[^@]+}}@callerC2() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() -; CHECK-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* [[UNKNOWN]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@callerC2() +; NOT_CGSCC_OPM-NEXT: [[UNKNOWN:%.*]] = call noundef i8* @unknown_ptr() +; NOT_CGSCC_OPM-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noundef [[UNKNOWN]]) +; NOT_CGSCC_OPM-NEXT: ret void +; +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@callerC2() +; IS__CGSCC_OPM-NEXT: [[UNKNOWN:%.*]] = call i8* @unknown_ptr() +; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noundef [[UNKNOWN]]) +; IS__CGSCC_OPM-NEXT: ret void ; %unknown = call i8* @unknown_ptr() call i8* @inaccesible_argmem_only_decl(i8* %unknown) @@ -367,7 +377,7 @@ define void @callerC2() { } define void @callerD1() { ; CHECK-LABEL: define {{[^@]+}}@callerD1() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @argmem_only(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @argmem_only(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: store i8 0, i8* [[UNKNOWN]], align 1 ; CHECK-NEXT: ret void ; @@ -377,7 +387,7 @@ define void @callerD1() { } define void @callerD2() { ; CHECK-LABEL: define {{[^@]+}}@callerD2() -; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: [[UNKNOWN:%.*]] = call i8* @inaccesible_argmem_only_decl(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: store i8 0, i8* [[UNKNOWN]], align 1 ; CHECK-NEXT: ret void ; @@ -464,12 +474,12 @@ define void @writeonly_global() { define void @writeonly_global_via_arg() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@writeonly_global_via_arg() -; IS__TUNIT____-NEXT: call void @write_global_via_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) @G) +; IS__TUNIT____-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@writeonly_global_via_arg() -; IS__CGSCC____-NEXT: call void @write_global_via_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) @G) +; IS__CGSCC____-NEXT: call void @write_global_via_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) @G) ; IS__CGSCC____-NEXT: ret void ; call void @write_global_via_arg(i32* @G) @@ -499,7 +509,7 @@ define i8 @recursive_not_readnone(i8* %ptr, i1 %c) { ; CHECK-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; CHECK: f: @@ -520,11 +530,11 @@ f: define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind ; IS__TUNIT____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal -; IS__TUNIT____-SAME: (i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) +; IS__TUNIT____-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) ; IS__TUNIT____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__TUNIT____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__TUNIT____-NEXT: ret i8 [[R]] ; IS__TUNIT____: f: @@ -533,11 +543,11 @@ define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) { ; ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind ; IS__CGSCC____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal -; IS__CGSCC____-SAME: (i8* nocapture nofree nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) +; IS__CGSCC____-SAME: (i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) ; IS__CGSCC____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__CGSCC____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__CGSCC____-NEXT: ret i8 [[R]] ; IS__CGSCC____: f: @@ -560,7 +570,7 @@ define i8 @readnone_caller(i1 %c) { ; CHECK-LABEL: define {{[^@]+}}@readnone_caller ; CHECK-SAME: (i1 [[C:%.*]]) ; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 -; CHECK-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) +; CHECK-NEXT: [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) ; CHECK-NEXT: ret i8 [[R]] ; %a = alloca i8 @@ -575,7 +585,7 @@ define internal i8 @recursive_not_readnone_internal2(i8* %ptr, i1 %c) { ; IS__TUNIT____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__TUNIT____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__TUNIT____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__TUNIT____-NEXT: ret i8 [[R]] ; IS__TUNIT____: f: @@ -588,7 +598,7 @@ define internal i8 @recursive_not_readnone_internal2(i8* %ptr, i1 %c) { ; IS__CGSCC____-NEXT: [[ALLOC:%.*]] = alloca i8, align 1 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 false) ; IS__CGSCC____-NEXT: [[R:%.*]] = load i8, i8* [[ALLOC]], align 1 ; IS__CGSCC____-NEXT: ret i8 [[R]] ; IS__CGSCC____: f: diff --git a/llvm/test/Transforms/Attributor/misc.ll b/llvm/test/Transforms/Attributor/misc.ll index 80a6948ca6dc4c..3ab1f8543aeb26 100644 --- a/llvm/test/Transforms/Attributor/misc.ll +++ b/llvm/test/Transforms/Attributor/misc.ll @@ -13,10 +13,10 @@ define internal void @internal(void (i8*)* %fp) { ; CHECK-SAME: (void (i8*)* nonnull [[FP:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) -; CHECK-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; CHECK-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) +; CHECK-NEXT: call void @callback1(void (i32*)* noundef nonnull @foo) +; CHECK-NEXT: call void @callback2(void (i8*)* noundef bitcast (void (i32*)* @foo to void (i8*)*)) ; CHECK-NEXT: call void @callback2(void (i8*)* nonnull [[FP]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: call void [[FP]](i8* [[TMP1]]) @@ -42,9 +42,9 @@ define void @external(void (i8*)* %fp) { ; CHECK-SAME: (void (i8*)* [[FP:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void @callback1(void (i32*)* nonnull @foo) -; CHECK-NEXT: call void @callback2(void (i8*)* bitcast (void (i32*)* @foo to void (i8*)*)) +; CHECK-NEXT: call void @foo(i32* noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void @callback1(void (i32*)* noundef nonnull @foo) +; CHECK-NEXT: call void @callback2(void (i8*)* noundef bitcast (void (i32*)* @foo to void (i8*)*)) ; CHECK-NEXT: call void @callback2(void (i8*)* [[FP]]) ; CHECK-NEXT: call void [[FP]](i8* bitcast (void (i32*)* @foo to i8*)) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* diff --git a/llvm/test/Transforms/Attributor/misc_crash.ll b/llvm/test/Transforms/Attributor/misc_crash.ll index b49cbd94322d16..e420f58af13683 100644 --- a/llvm/test/Transforms/Attributor/misc_crash.ll +++ b/llvm/test/Transforms/Attributor/misc_crash.ll @@ -29,7 +29,7 @@ define i32* @func1() { } ; UTC_ARGS: --disable -; CHECK-LABEL: define internal nonnull align 4 dereferenceable(4) i32* @func1a() +; CHECK-LABEL: define internal noundef nonnull align 4 dereferenceable(4) i32* @func1a() ; CHECK-NEXT: ret i32* getelementptr inbounds ([1 x i32], [1 x i32]* @var1, i32 0, i32 0) define internal i32* @func1a([1 x i32]* %arg) { %ptr = getelementptr inbounds [1 x i32], [1 x i32]* %arg, i64 0, i64 0 @@ -40,7 +40,7 @@ define internal i32* @func1a([1 x i32]* %arg) { define internal void @func2a(i32* %0) { ; CHECK: Function Attrs: nofree nosync nounwind willreturn writeonly ; CHECK-LABEL: define {{[^@]+}}@func2a -; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) +; CHECK-SAME: (i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[TMP0:%.*]]) ; CHECK-NEXT: store i32 0, i32* @var2, align 4 ; CHECK-NEXT: ret void ; @@ -50,7 +50,7 @@ define internal void @func2a(i32* %0) { define i32 @func2() { ; CHECK-LABEL: define {{[^@]+}}@func2() -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* nonnull align 4 dereferenceable(4) @var2) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* noundef nonnull align 4 dereferenceable(4) @var2) ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @var2, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] ; @@ -62,7 +62,7 @@ define i32 @func2() { define i32 @func3(i1 %false) { ; CHECK-LABEL: define {{[^@]+}}@func3 ; CHECK-SAME: (i1 [[FALSE:%.*]]) -; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* nonnull align 4 dereferenceable(4) @var2) +; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 (i32*, ...) bitcast (void (i32*)* @func2a to i32 (i32*, ...)*)(i32* noundef nonnull align 4 dereferenceable(4) @var2) ; CHECK-NEXT: br i1 [[FALSE]], label [[USE_BB:%.*]], label [[RET_BB:%.*]] ; CHECK: use_bb: ; CHECK-NEXT: ret i32 [[TMP1]] diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll index 5314ea53f5a940..ff780b6714259b 100644 --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -56,9 +56,13 @@ define void @nocapture(i8* %a){ } define i8* @return_noalias_looks_like_capture(){ -; CHECK-LABEL: define {{[^@]+}}@return_noalias_looks_like_capture() -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: ret i8* [[TMP1]] +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@return_noalias_looks_like_capture() +; NOT_CGSCC_NPM-NEXT: [[TMP1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: ret i8* [[TMP1]] +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@return_noalias_looks_like_capture() +; IS__CGSCC____-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: ret i8* [[TMP1]] ; %1 = tail call noalias i8* @malloc(i64 4) call void @nocapture(i8* %1) @@ -180,7 +184,7 @@ define i8* @test6() nounwind uwtable ssp { ; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 ; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @strdup(i8* nocapture nonnull dereferenceable(2) [[ARRAYIDX]]) +; CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @strdup(i8* nocapture noundef nonnull dereferenceable(2) [[ARRAYIDX]]) ; CHECK-NEXT: ret i8* [[CALL]] ; %x = alloca [2 x i8], align 1 @@ -254,7 +258,7 @@ define i8* @test8(i32* %0) nounwind uwtable { declare void @use_i8(i8* nocapture) define internal void @test9a(i8* %a, i8* %b) { ; CHECK-LABEL: define {{[^@]+}}@test9a() -; CHECK-NEXT: call void @use_i8(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: call void @use_i8(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: ret void ; call void @use_i8(i8* null) @@ -353,14 +357,23 @@ define void @test11(i8* noalias %a) { declare void @use_nocapture(i8* nocapture) declare void @use(i8*) define void @test12_1() { -; CHECK-LABEL: define {{[^@]+}}@test12_1() -; CHECK-NEXT: [[A:%.*]] = alloca i8, align 4 -; CHECK-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture nonnull align 4 dereferenceable(1) [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture nonnull align 4 dereferenceable(1) [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[B]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test12_1() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = alloca i8, align 4 +; NOT_CGSCC_NPM-NEXT: [[B:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_1() +; IS__CGSCC____-NEXT: [[A:%.*]] = alloca i8, align 4 +; IS__CGSCC____-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef nonnull align 4 dereferenceable(1) [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[B]]) +; IS__CGSCC____-NEXT: ret void ; %A = alloca i8, align 4 %B = tail call noalias i8* @malloc(i64 4) @@ -372,13 +385,21 @@ define void @test12_1() { } define void @test12_2(){ -; CHECK-LABEL: define {{[^@]+}}@test12_2() -; CHECK-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* noalias nocapture [[A]]) -; CHECK-NEXT: tail call void @use(i8* [[A]]) -; CHECK-NEXT: tail call void @use_nocapture(i8* nocapture [[A]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test12_2() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use(i8* noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_nocapture(i8* nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_2() +; IS__CGSCC____-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* noalias nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: tail call void @use(i8* noundef [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_nocapture(i8* nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: ret void ; ; FIXME: This should be @use_nocapture(i8* noalias [[A]]) ; FIXME: This should be @use_nocapture(i8* noalias nocapture [[A]]) @@ -392,10 +413,15 @@ define void @test12_2(){ declare void @two_args(i8* nocapture , i8* nocapture) define void @test12_3(){ -; CHECK-LABEL: define {{[^@]+}}@test12_3() -; CHECK-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test12_3() +; NOT_CGSCC_NPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_3() +; IS__CGSCC____-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A]]) +; IS__CGSCC____-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) tail call void @two_args(i8* %A, i8* %A) @@ -404,28 +430,40 @@ define void @test12_3(){ define void @test12_4(){ ; IS________OPM-LABEL: define {{[^@]+}}@test12_4() -; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS________OPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS________OPM-NEXT: [[B:%.*]] = tail call noalias noundef i8* @malloc(i64 4) ; IS________OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 ; IS________OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 ; IS________OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[B]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[B]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture [[A_1]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A_0]], i8* nocapture noundef [[B_0]]) ; IS________OPM-NEXT: ret void ; -; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() -; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) -; NOT_TUNIT_OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 -; NOT_TUNIT_OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; NOT_TUNIT_OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) -; NOT_TUNIT_OPM-NEXT: ret void +; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test12_4() +; IS__TUNIT_NPM-NEXT: [[A:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_NPM-NEXT: [[B:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; IS__TUNIT_NPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; IS__TUNIT_NPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; IS__TUNIT_NPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* noalias nocapture noundef [[A]], i8* noalias nocapture noundef [[B]]) +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_0]]) +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture [[A_1]]) +; IS__TUNIT_NPM-NEXT: tail call void @two_args(i8* nocapture noundef [[A_0]], i8* nocapture noundef [[B_0]]) +; IS__TUNIT_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_4() +; IS__CGSCC____-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 +; IS__CGSCC____-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; IS__CGSCC____-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 +; IS__CGSCC____-NEXT: tail call void @two_args(i8* noalias nocapture noundef [[A]], i8* noalias nocapture noundef [[B]]) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_0]]) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A]], i8* nocapture noundef [[A_1]]) +; IS__CGSCC____-NEXT: tail call void @two_args(i8* nocapture noundef [[A_0]], i8* nocapture noundef [[B_0]]) +; IS__CGSCC____-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) %B = tail call noalias i8* @malloc(i64 4) @@ -456,12 +494,19 @@ define void @use_i8_internal(i8* %a) { } define void @test13_use_noalias(){ -; CHECK-LABEL: define {{[^@]+}}@test13_use_noalias() -; CHECK-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) -; CHECK-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; CHECK-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* -; CHECK-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test13_use_noalias() +; NOT_CGSCC_NPM-NEXT: [[M1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) +; NOT_CGSCC_NPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; NOT_CGSCC_NPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* +; NOT_CGSCC_NPM-NEXT: call void @use_i8_internal(i8* noalias nocapture noundef [[C2]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@test13_use_noalias() +; IS__CGSCC____-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) +; IS__CGSCC____-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; IS__CGSCC____-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* +; IS__CGSCC____-NEXT: call void @use_i8_internal(i8* noalias nocapture noundef [[C2]]) +; IS__CGSCC____-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() ; IS__CGSCC_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) @@ -478,12 +523,12 @@ define void @test13_use_noalias(){ define void @test13_use_alias(){ ; CHECK-LABEL: define {{[^@]+}}@test13_use_alias() -; CHECK-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 4) +; CHECK-NEXT: [[M1:%.*]] = tail call noalias noundef i8* @malloc(i64 4) ; CHECK-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* ; CHECK-NEXT: [[C2A:%.*]] = bitcast i16* [[C1]] to i8* ; CHECK-NEXT: [[C2B:%.*]] = bitcast i16* [[C1]] to i8* -; CHECK-NEXT: call void @use_i8_internal(i8* nocapture [[C2A]]) -; CHECK-NEXT: call void @use_i8_internal(i8* nocapture [[C2B]]) +; CHECK-NEXT: call void @use_i8_internal(i8* nocapture noundef [[C2A]]) +; CHECK-NEXT: call void @use_i8_internal(i8* nocapture noundef [[C2B]]) ; CHECK-NEXT: ret void ; %m1 = tail call noalias i8* @malloc(i64 4) @@ -570,11 +615,11 @@ define internal fastcc double @strtox(i8* %s, i8** %p, i32 %prec) unnamed_addr { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[F:%.*]] = alloca [[STRUCT__IO_FILE:%.*]], align 8 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct._IO_FILE* [[F]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) ; CHECK-NEXT: [[CALL:%.*]] = call i32 bitcast (i32 (...)* @sh_fromstring to i32 (%struct._IO_FILE*, i8*)*)(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i8* [[S]]) -; CHECK-NEXT: call void @__shlim(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i64 0) -; CHECK-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* nonnull align 8 dereferenceable(240) [[F]], i32 1, i32 1) -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nocapture nonnull align 8 dereferenceable(240) [[TMP0]]) +; CHECK-NEXT: call void @__shlim(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i64 0) +; CHECK-NEXT: [[CALL1:%.*]] = call double @__floatscan(%struct._IO_FILE* noundef nonnull align 8 dereferenceable(240) [[F]], i32 1, i32 1) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 144, i8* nocapture noundef nonnull align 8 dereferenceable(240) [[TMP0]]) ; CHECK-NEXT: ret double [[CALL1]] ; entry: diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll index 58d8be6d60c9b5..4ea6a327db6e84 100644 --- a/llvm/test/Transforms/Attributor/nocapture-1.ll +++ b/llvm/test/Transforms/Attributor/nocapture-1.ll @@ -459,7 +459,7 @@ define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2, i1 %c) { ; CHECK-SAME: (i8* nocapture nofree readnone [[X4_2:%.*]], i8* nofree readnone returned "no-capture-maybe-returned" [[Y4_2:%.*]], i8* nocapture nofree readnone [[Z4_2:%.*]], i1 [[C:%.*]]) ; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; CHECK: t: -; CHECK-NEXT: call void @test4_1(i8* noalias nocapture nofree readnone align 536870912 null, i1 [[C]]) +; CHECK-NEXT: call void @test4_1(i8* noalias nocapture nofree noundef readnone align 536870912 null, i1 [[C]]) ; CHECK-NEXT: store i32* null, i32** @g, align 8 ; CHECK-NEXT: br label [[F]] ; CHECK: f: @@ -759,7 +759,7 @@ declare void @unknown(i8*) define void @test_callsite() { ; CHECK-LABEL: define {{[^@]+}}@test_callsite() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @unknown(i8* noalias nocapture align 536870912 null) +; CHECK-NEXT: call void @unknown(i8* noalias nocapture noundef align 536870912 null) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/Attributor/nocapture-2.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll index 5ed73b74c8693b..c4bc297ee2ad76 100644 --- a/llvm/test/Transforms/Attributor/nocapture-2.ll +++ b/llvm/test/Transforms/Attributor/nocapture-2.ll @@ -217,11 +217,11 @@ define float* @scc_A(i32* dereferenceable_or_null(4) %a) { ; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A]] to i16* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) +; CHECK-NEXT: [[CALL:%.*]] = call noundef dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) +; CHECK-NEXT: [[CALL1:%.*]] = call noundef dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32* -; CHECK-NEXT: [[CALL2:%.*]] = call float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) +; CHECK-NEXT: [[CALL2:%.*]] = call float* @scc_A(i32* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32* ; CHECK-NEXT: br label [[COND_END:%.*]] ; CHECK: cond.false: @@ -263,11 +263,11 @@ define i64* @scc_B(double* dereferenceable_or_null(8) %a) { ; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i32* -; CHECK-NEXT: [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) +; CHECK-NEXT: [[CALL:%.*]] = call noundef dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[CALL]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) +; CHECK-NEXT: [[CALL1:%.*]] = call noundef dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16* -; CHECK-NEXT: [[CALL2:%.*]] = call i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) +; CHECK-NEXT: [[CALL2:%.*]] = call i8* @scc_C(i16* noalias nofree noundef nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP2]]) ; CHECK-NEXT: br label [[COND_END:%.*]] ; CHECK: cond.false: ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[A]] to i8* @@ -312,16 +312,16 @@ define i8* @scc_C(i16* dereferenceable_or_null(2) %a) { ; CHECK-NEXT: br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] ; CHECK: cond.true: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[A]] to double* -; CHECK-NEXT: [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) +; CHECK-NEXT: [[CALL1:%.*]] = call noundef dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree nonnull readnone dereferenceable(8) "no-capture-maybe-returned" [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8* ; CHECK-NEXT: br label [[COND_END:%.*]] ; CHECK: cond.false: -; CHECK-NEXT: [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[A]]) +; CHECK-NEXT: [[CALL2:%.*]] = call noundef dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[A]]) ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: ; CHECK-NEXT: [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[COND]] to i32* -; CHECK-NEXT: [[CALL3:%.*]] = call float* @scc_A(i32* noalias nofree nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP2]]) +; CHECK-NEXT: [[CALL3:%.*]] = call float* @scc_A(i32* noalias nofree noundef nonnull readnone dereferenceable(4) "no-capture-maybe-returned" [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8* ; CHECK-NEXT: ret i8* [[TMP3]] ; diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 4add5a5c1f5a89..6e06b3b195204f 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -364,10 +364,10 @@ define void @test12(i8* nonnull %a) { declare i8* @unknown() define void @test13_helper() { ; CHECK-LABEL: define {{[^@]+}}@test13_helper() -; CHECK-NEXT: [[NONNULLPTR:%.*]] = tail call nonnull i8* @ret_nonnull() -; CHECK-NEXT: [[MAYBENULLPTR:%.*]] = tail call i8* @unknown() -; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree readnone [[MAYBENULLPTR]]) -; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree readnone [[MAYBENULLPTR]], i8* noalias nocapture nofree nonnull readnone [[NONNULLPTR]]) +; CHECK-NEXT: [[NONNULLPTR:%.*]] = tail call noundef nonnull i8* @ret_nonnull() +; CHECK-NEXT: [[MAYBENULLPTR:%.*]] = tail call noundef i8* @unknown() +; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree noundef readnone [[MAYBENULLPTR]]) +; CHECK-NEXT: tail call void @test13(i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]], i8* noalias nocapture nofree noundef readnone [[MAYBENULLPTR]], i8* noalias nocapture nofree noundef nonnull readnone [[NONNULLPTR]]) ; CHECK-NEXT: ret void ; %nonnullptr = tail call i8* @ret_nonnull() @@ -379,10 +379,10 @@ define void @test13_helper() { define internal void @test13(i8* %a, i8* %b, i8* %c) { ; IS__TUNIT____: Function Attrs: nounwind ; IS__TUNIT____-LABEL: define {{[^@]+}}@test13 -; IS__TUNIT____-SAME: (i8* noalias nocapture nofree nonnull readnone [[A:%.*]], i8* noalias nocapture nofree readnone [[B:%.*]], i8* noalias nocapture nofree readnone [[C:%.*]]) -; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree nonnull readnone [[A]]) -; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree readnone [[B]]) -; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree readnone [[C]]) +; IS__TUNIT____-SAME: (i8* noalias nocapture nofree noundef nonnull readnone [[A:%.*]], i8* noalias nocapture nofree noundef readnone [[B:%.*]], i8* noalias nocapture nofree noundef readnone [[C:%.*]]) +; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree noundef nonnull readnone [[A]]) +; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree noundef readnone [[B]]) +; IS__TUNIT____-NEXT: call void @use_i8_ptr(i8* noalias nocapture nofree noundef readnone [[C]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nounwind diff --git a/llvm/test/Transforms/Attributor/noreturn_async.ll b/llvm/test/Transforms/Attributor/noreturn_async.ll index 879fb16a13d0bf..6c3526baba2bb7 100644 --- a/llvm/test/Transforms/Attributor/noreturn_async.ll +++ b/llvm/test/Transforms/Attributor/noreturn_async.ll @@ -86,7 +86,7 @@ entry: ; CHECK-NOT: nounwind ; CHECK-NEXT: define ; CHECK-NEXT: entry: -; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) +; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) ; CHECK-NEXT: call void @"?overflow@@YAXXZ_may_throw"() ; CHECK-NEXT: unreachable %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) diff --git a/llvm/test/Transforms/Attributor/noreturn_sync.ll b/llvm/test/Transforms/Attributor/noreturn_sync.ll index 22b675427cf01f..0321b0ceafd76e 100644 --- a/llvm/test/Transforms/Attributor/noreturn_sync.ll +++ b/llvm/test/Transforms/Attributor/noreturn_sync.ll @@ -82,7 +82,7 @@ entry: ; CHECK-NOT: nounwind ; CHECK-NEXT: define ; CHECK-NEXT: entry: -; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) +; CHECK-NEXT: %call3 = call i32 (i8*, ...) @printf(i8* noundef nonnull dereferenceable(18) getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) ; CHECK-NEXT: call void @"?overflow@@YAXXZ_may_throw"() ; CHECK-NEXT: unreachable %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"??_C@_0BC@NKPAGFFJ@Exception?5caught?6?$AA@", i64 0, i64 0)) diff --git a/llvm/test/Transforms/Attributor/nosync.ll b/llvm/test/Transforms/Attributor/nosync.ll index 1404cc4b34c23a..102b2e86ac2132 100644 --- a/llvm/test/Transforms/Attributor/nosync.ll +++ b/llvm/test/Transforms/Attributor/nosync.ll @@ -459,7 +459,7 @@ declare void @llvm.x86.sse2.clflush(i8*) define void @i_totally_sync() { ; CHECK: Function Attrs: nounwind ; CHECK-LABEL: define {{[^@]+}}@i_totally_sync() -; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(i8* nonnull align 4 dereferenceable(4) bitcast (i32* @a to i8*)) +; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(i8* noundef nonnull align 4 dereferenceable(4) bitcast (i32* @a to i8*)) ; CHECK-NEXT: ret void ; tail call void @llvm.x86.sse2.clflush(i8* bitcast (i32* @a to i8*)) diff --git a/llvm/test/Transforms/Attributor/noundef.ll b/llvm/test/Transforms/Attributor/noundef.ll new file mode 100644 index 00000000000000..b7c1d45205a607 --- /dev/null +++ b/llvm/test/Transforms/Attributor/noundef.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM + +declare void @unknown() + +declare void @bar(i32*) + +define void @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @bar(i32* noundef nonnull align 4 dereferenceable(4) [[X]]) +; CHECK-NEXT: ret void +; + %x = alloca i32 + call void @unknown() + call void @bar(i32* %x) + ret void +} diff --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll index 7c114500328003..701b70926aaa2d 100644 --- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll +++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll @@ -78,11 +78,11 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; IS__TUNIT____-NEXT: store i32 3, i32* [[R0]], align 4 ; IS__TUNIT____-NEXT: store i32 5, i32* [[R1]], align 4 ; IS__TUNIT____-NEXT: store i32 1, i32* [[W0]], align 4 -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) -; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__TUNIT____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) ; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__TUNIT____-NEXT: br label [[RETURN]] ; IS__TUNIT____: return: @@ -103,11 +103,11 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) { ; IS__CGSCC____-NEXT: store i32 3, i32* [[R0]], align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[R1]], align 4 ; IS__CGSCC____-NEXT: store i32 1, i32* [[W0]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) -; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) +; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: return: diff --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll index 2f2c18d293ba0f..37381026ab354a 100644 --- a/llvm/test/Transforms/Attributor/readattrs.ll +++ b/llvm/test/Transforms/Attributor/readattrs.ll @@ -241,7 +241,7 @@ define void @unsound_readnone(i8* %ignored, i8* %escaped_then_written) { ; CHECK-LABEL: define {{[^@]+}}@unsound_readnone ; CHECK-SAME: (i8* nocapture nofree readnone [[IGNORED:%.*]], i8* [[ESCAPED_THEN_WRITTEN:%.*]]) ; CHECK-NEXT: [[ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: call void @escape_readnone_ptr(i8** nonnull align 8 dereferenceable(8) [[ADDR]], i8* noalias readnone [[ESCAPED_THEN_WRITTEN]]) +; CHECK-NEXT: call void @escape_readnone_ptr(i8** noundef nonnull align 8 dereferenceable(8) [[ADDR]], i8* noalias readnone [[ESCAPED_THEN_WRITTEN]]) ; CHECK-NEXT: [[ADDR_LD:%.*]] = load i8*, i8** [[ADDR]], align 8 ; CHECK-NEXT: store i8 0, i8* [[ADDR_LD]], align 1 ; CHECK-NEXT: ret void @@ -257,7 +257,7 @@ define void @unsound_readonly(i8* %ignored, i8* %escaped_then_written) { ; CHECK-LABEL: define {{[^@]+}}@unsound_readonly ; CHECK-SAME: (i8* nocapture nofree readnone [[IGNORED:%.*]], i8* [[ESCAPED_THEN_WRITTEN:%.*]]) ; CHECK-NEXT: [[ADDR:%.*]] = alloca i8*, align 8 -; CHECK-NEXT: call void @escape_readonly_ptr(i8** nonnull align 8 dereferenceable(8) [[ADDR]], i8* readonly [[ESCAPED_THEN_WRITTEN]]) +; CHECK-NEXT: call void @escape_readonly_ptr(i8** noundef nonnull align 8 dereferenceable(8) [[ADDR]], i8* readonly [[ESCAPED_THEN_WRITTEN]]) ; CHECK-NEXT: [[ADDR_LD:%.*]] = load i8*, i8** [[ADDR]], align 8 ; CHECK-NEXT: store i8 0, i8* [[ADDR_LD]], align 1 ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll index b0007746592de5..2df7eebc6c0c59 100644 --- a/llvm/test/Transforms/Attributor/returned.ll +++ b/llvm/test/Transforms/Attributor/returned.ll @@ -314,8 +314,8 @@ define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 { ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr_scc_r1 ; IS__TUNIT____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) ; IS__TUNIT____-NEXT: entry: -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noundef double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__TUNIT____-NEXT: ret double* [[CALL1]] ; ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable @@ -323,7 +323,7 @@ define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 { ; IS__CGSCC____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__CGSCC____-NEXT: ret double* [[CALL1]] ; entry: @@ -340,20 +340,20 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; IS__TUNIT____-NEXT: [[CMP:%.*]] = icmp ugt double* [[A]], [[B]] ; IS__TUNIT____-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; IS__TUNIT____: if.then: -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) -; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call noundef double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) +; IS__TUNIT____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__TUNIT____-NEXT: br label [[RETURN:%.*]] ; IS__TUNIT____: if.end: ; IS__TUNIT____-NEXT: [[CMP2:%.*]] = icmp ult double* [[A]], [[B]] ; IS__TUNIT____-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]] ; IS__TUNIT____: if.then3: -; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[B]]) -; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) -; IS__TUNIT____-NEXT: [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) -; IS__TUNIT____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) -; IS__TUNIT____-NEXT: [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) -; IS__TUNIT____-NEXT: [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) -; IS__TUNIT____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL4:%.*]] = call noundef double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[B]]) +; IS__TUNIT____-NEXT: [[CALL5:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL6:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) +; IS__TUNIT____-NEXT: [[CALL7:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL6]], double* noalias nocapture nofree readnone undef) +; IS__TUNIT____-NEXT: [[CALL8:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) +; IS__TUNIT____-NEXT: [[CALL9:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree noundef readnone [[CALL5]], double* noalias nofree noundef readnone [[CALL7]], double* noalias nofree noundef readnone [[CALL8]]) +; IS__TUNIT____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree noundef readnone [[CALL4]], double* noalias nofree noundef readnone [[CALL9]], double* noalias nocapture nofree noundef readnone undef) ; IS__TUNIT____-NEXT: br label [[RETURN]] ; IS__TUNIT____: if.end12: ; IS__TUNIT____-NEXT: [[CMP13:%.*]] = icmp eq double* [[A]], [[B]] @@ -378,19 +378,19 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 { ; IS__CGSCC____-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; IS__CGSCC____: if.then: ; IS__CGSCC____-NEXT: [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) +; IS__CGSCC____-NEXT: [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL]]) ; IS__CGSCC____-NEXT: br label [[RETURN:%.*]] ; IS__CGSCC____: if.end: ; IS__CGSCC____-NEXT: [[CMP2:%.*]] = icmp ult double* [[A]], [[B]] ; IS__CGSCC____-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]] ; IS__CGSCC____: if.then3: ; IS__CGSCC____-NEXT: [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone [[B]]) -; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) -; IS__CGSCC____-NEXT: [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) -; IS__CGSCC____-NEXT: [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) -; IS__CGSCC____-NEXT: [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) -; IS__CGSCC____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL5:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL6:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) +; IS__CGSCC____-NEXT: [[CALL7:%.*]] = call noundef double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree noundef readnone [[CALL6]], double* noalias nocapture nofree readnone undef) +; IS__CGSCC____-NEXT: [[CALL8:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) +; IS__CGSCC____-NEXT: [[CALL9:%.*]] = call noundef double* @ptr_scc_r2(double* noalias nofree noundef readnone [[CALL5]], double* noalias nofree noundef readnone [[CALL7]], double* noalias nofree noundef readnone [[CALL8]]) +; IS__CGSCC____-NEXT: [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree noundef readnone [[CALL4]], double* noalias nofree noundef readnone [[CALL9]], double* noalias nocapture nofree noundef readnone undef) ; IS__CGSCC____-NEXT: br label [[RETURN]] ; IS__CGSCC____: if.end12: ; IS__CGSCC____-NEXT: [[CMP13:%.*]] = icmp eq double* [[A]], [[B]] @@ -605,7 +605,7 @@ define i32* @calls_unknown_fn(i32* %r) #0 { ; CHECK: Function Attrs: noinline nounwind uwtable ; CHECK-LABEL: define {{[^@]+}}@calls_unknown_fn ; CHECK-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) -; CHECK-NEXT: tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn) +; CHECK-NEXT: tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) ; CHECK-NEXT: ret i32* [[R]] ; tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn) diff --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll index 22c2979e23defe..b4a02671b7cdc3 100644 --- a/llvm/test/Transforms/Attributor/undefined_behavior.ll +++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll @@ -704,12 +704,12 @@ ret: define void @arg_nonnull_violation1_1() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1() -; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree nonnull writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation1_1() -; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree nonnull writeonly align 536870912 dereferenceable(4) null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 dereferenceable(4) null) ; IS__CGSCC____-NEXT: ret void ; call void @arg_nonnull_1(i32* null) @@ -734,13 +734,13 @@ define void @arg_nonnull_violation2_1(i1 %c) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; IS__TUNIT____-SAME: (i1 [[C:%.*]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree nonnull writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree noundef nonnull writeonly align 536870912 null) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@arg_nonnull_violation2_1 ; IS__CGSCC____-SAME: (i1 [[C:%.*]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree nonnull writeonly align 536870912 dereferenceable(4) null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_1(i32* nocapture nofree noundef nonnull writeonly align 536870912 dereferenceable(4) null) ; IS__CGSCC____-NEXT: ret void ; %null = getelementptr i32, i32* null, i32 0 @@ -774,16 +774,16 @@ define void @arg_nonnull_violation3_1(i1 %c) { ; IS__TUNIT____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: br label [[RET:%.*]] ; IS__TUNIT____: f: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: br label [[RET]] ; IS__TUNIT____: ret: ; IS__TUNIT____-NEXT: ret void @@ -794,16 +794,16 @@ define void @arg_nonnull_violation3_1(i1 %c) { ; IS__CGSCC____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: br label [[RET:%.*]] ; IS__CGSCC____: f: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: br label [[RET]] ; IS__CGSCC____: ret: ; IS__CGSCC____-NEXT: ret void @@ -833,12 +833,12 @@ define void @arg_nonnull_violation3_2(i1 %c) { ; IS__TUNIT____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__TUNIT____: t: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: unreachable ; IS__TUNIT____: f: -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__TUNIT____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: unreachable ; IS__TUNIT____: ret: ; IS__TUNIT____-NEXT: ret void @@ -849,12 +849,12 @@ define void @arg_nonnull_violation3_2(i1 %c) { ; IS__CGSCC____-NEXT: [[PTR:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; IS__CGSCC____: t: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: unreachable ; IS__CGSCC____: f: -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]]) -; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree nonnull writeonly align 536870912 null, i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]]) +; IS__CGSCC____-NEXT: call void @arg_nonnull_12_noundef_2(i32* noalias nocapture nofree noundef nonnull writeonly align 536870912 null, i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[PTR]], i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: unreachable ; IS__CGSCC____: ret: ; IS__CGSCC____-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll index 3a487a9ed22995..7ae8cd37801171 100644 --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -321,12 +321,12 @@ define i32 @ipccp3() { define internal i32* @test_inalloca(i32* inalloca %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_inalloca -; IS__TUNIT____-SAME: (i32* inalloca noalias nofree returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__TUNIT____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__TUNIT____-NEXT: ret i32* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_inalloca -; IS__CGSCC____-SAME: (i32* inalloca noalias nofree returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__CGSCC____-NEXT: ret i32* [[A]] ; ret i32* %a @@ -334,12 +334,12 @@ define internal i32* @test_inalloca(i32* inalloca %a) { define i32* @complicated_args_inalloca() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_inalloca() -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__TUNIT____-NEXT: ret i32* [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_inalloca() -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree writeonly align 536870912 null) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) ; IS__CGSCC____-NEXT: ret i32* [[CALL]] ; %call = call i32* @test_inalloca(i32* null) @@ -349,12 +349,12 @@ define i32* @complicated_args_inalloca() { define internal i32* @test_preallocated(i32* preallocated(i32) %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_preallocated -; IS__TUNIT____-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__TUNIT____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__TUNIT____-NEXT: ret i32* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_preallocated -; IS__CGSCC____-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__CGSCC____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__CGSCC____-NEXT: ret i32* [[A]] ; ret i32* %a @@ -363,25 +363,25 @@ define i32* @complicated_args_preallocated() { ; IS__TUNIT_OPM: Function Attrs: nounwind ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__TUNIT_OPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] +; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] ; IS__TUNIT_OPM-NEXT: ret i32* [[CALL]] ; ; IS__TUNIT_NPM: Function Attrs: nounwind ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__TUNIT_NPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR4:#.*]] [ "preallocated"(token [[C]]) ] +; IS__TUNIT_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR4:#.*]] [ "preallocated"(token [[C]]) ] ; IS__TUNIT_NPM-NEXT: ret i32* [[CALL]] ; ; IS__CGSCC_OPM: Function Attrs: nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__CGSCC_OPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR6:#.*]] [ "preallocated"(token [[C]]) ] +; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR6:#.*]] [ "preallocated"(token [[C]]) ] ; IS__CGSCC_OPM-NEXT: ret i32* [[CALL]] ; ; IS__CGSCC_NPM: Function Attrs: nounwind ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated() ; IS__CGSCC_NPM-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1) -; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] +; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) [[ATTR5:#.*]] [ "preallocated"(token [[C]]) ] ; IS__CGSCC_NPM-NEXT: ret i32* [[CALL]] ; %c = call token @llvm.call.preallocated.setup(i32 1) @@ -393,13 +393,13 @@ define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) { ; ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_sret -; IS__TUNIT____-SAME: (%struct.X* noalias nofree nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) +; IS__TUNIT____-SAME: (%struct.X* noalias nofree noundef nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) ; IS__TUNIT____-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_sret -; IS__CGSCC____-SAME: (%struct.X* noalias nofree nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) +; IS__CGSCC____-SAME: (%struct.X* noalias nofree noundef nonnull sret writeonly align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) ; IS__CGSCC____-NEXT: store %struct.X* [[A]], %struct.X** [[B]], align 8 ; IS__CGSCC____-NEXT: ret void ; @@ -412,14 +412,13 @@ define void @complicated_args_sret(%struct.X** %b) { ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__TUNIT____-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) -; IS__TUNIT____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree writeonly align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) +; IS__TUNIT____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_sret ; IS__CGSCC____-SAME: (%struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) -; IS__CGSCC____-NEXT: call void @test_sret(%struct.X* noalias nocapture nofree nonnull writeonly align 536870912 dereferenceable(8) null, %struct.X** nocapture nofree nonnull writeonly align 8 dereferenceable(8) [[B]]) -; IS__CGSCC____-NEXT: ret void +; IS__CGSCC____-NEXT: unreachable ; call void @test_sret(%struct.X* null, %struct.X** %b) ret void @@ -428,12 +427,12 @@ define void @complicated_args_sret(%struct.X** %b) { define internal %struct.X* @test_nest(%struct.X* nest %a) { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_nest -; IS__TUNIT____-SAME: (%struct.X* nest noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__TUNIT____-SAME: (%struct.X* nest noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__TUNIT____-NEXT: ret %struct.X* [[A]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_nest -; IS__CGSCC____-SAME: (%struct.X* nest noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) +; IS__CGSCC____-SAME: (%struct.X* nest noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) ; IS__CGSCC____-NEXT: ret %struct.X* [[A]] ; ret %struct.X* %a @@ -441,12 +440,12 @@ define internal %struct.X* @test_nest(%struct.X* nest %a) { define %struct.X* @complicated_args_nest() { ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_nest() -; IS__TUNIT____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree readnone align 536870912 null) +; IS__TUNIT____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) ; IS__TUNIT____-NEXT: ret %struct.X* [[CALL]] ; ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_nest() -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree readnone align 536870912 null) +; IS__CGSCC____-NEXT: [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) ; IS__CGSCC____-NEXT: ret %struct.X* [[CALL]] ; %call = call %struct.X* @test_nest(%struct.X* null) @@ -457,7 +456,7 @@ define %struct.X* @complicated_args_nest() { define internal void @test_byval(%struct.X* byval %a) { ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test_byval -; IS__CGSCC_OPM-SAME: (%struct.X* noalias nocapture nofree nonnull writeonly byval align 8 dereferenceable(8) [[A:%.*]]) +; IS__CGSCC_OPM-SAME: (%struct.X* noalias nocapture nofree noundef nonnull writeonly byval align 8 dereferenceable(8) [[A:%.*]]) ; IS__CGSCC_OPM-NEXT: [[G0:%.*]] = getelementptr [[STRUCT_X:%.*]], %struct.X* [[A]], i32 0, i32 0 ; IS__CGSCC_OPM-NEXT: store i8* null, i8** [[G0]], align 8 ; IS__CGSCC_OPM-NEXT: ret void diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll index 07976660546f8f..b9e739a62b5b97 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -27,7 +27,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 define void @delete_parallel_0() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_0() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined.willreturn to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; entry: @@ -99,9 +99,9 @@ entry: define void @delete_parallel_1() { ; CHECK-LABEL: define {{[^@]+}}@delete_parallel_1() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*)) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 0, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) ; CHECK-NEXT: ret void ; entry: @@ -190,10 +190,10 @@ define void @delete_parallel_2() { ; CHECK-NEXT: [[TMP:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull align 4 dereferenceable(4) [[TMP]]) #0 ; CHECK-NEXT: store i32 0, i32* [[A]], align 4 -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture nonnull align 4 dereferenceable(4) [[A]]) -; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) +; CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @0, i32 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A]]) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP1]]) ; CHECK-NEXT: ret void @@ -214,7 +214,7 @@ entry: define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3 -; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[A:%.*]]) #6 +; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #6 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #4 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0 @@ -244,7 +244,7 @@ if.end: ; preds = %if.then, %entry define internal void @.omp_outlined..4(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 -; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]]) +; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 [[TMP]]) @@ -286,7 +286,7 @@ declare void @__kmpc_end_master(%struct.ident_t*, i32) define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5 -; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]]) +; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) ; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 @@ -324,7 +324,7 @@ omp_if.end: ; preds = %entry, %omp_if.then define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) { ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6 -; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]]) +; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 From b246bea921ae09c6f6a1d8c4fee7229a24990027 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 18 Aug 2020 09:10:43 +0000 Subject: [PATCH 019/101] [gn build] Port 00d7b7d014f --- llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn index c807389846b78f..6f3b5d43e673d5 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Tooling/BUILD.gn @@ -43,6 +43,7 @@ unittest("ToolingTests") { "RecursiveASTVisitorTests/CXXOperatorCallExprTraverser.cpp", "RecursiveASTVisitorTests/Callbacks.cpp", "RecursiveASTVisitorTests/Class.cpp", + "RecursiveASTVisitorTests/Concept.cpp", "RecursiveASTVisitorTests/ConstructExpr.cpp", "RecursiveASTVisitorTests/DeclRefExpr.cpp", "RecursiveASTVisitorTests/ImplicitCtor.cpp", From 13080ca1f0823b8df9651c1977040e5471c4a431 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Tue, 18 Aug 2020 11:32:51 +0200 Subject: [PATCH 020/101] [compiler-rt][test] XFAIL two tests on 32-bit sparc Two tests `FAIL` on 32-bit sparc: Profile-sparc :: Posix/instrprof-gcov-parallel.test UBSan-Standalone-sparc :: TestCases/Float/cast-overflow.cpp The failure mode is similar: Undefined first referenced symbol in file __atomic_store_4 /var/tmp/instrprof-gcov-parallel-6afe8d.o __atomic_load_4 /var/tmp/instrprof-gcov-parallel-6afe8d.o Undefined first referenced symbol in file __atomic_load_1 /var/tmp/cast-overflow-72a808.o This is a known bug: `clang` doesn't inline atomics on 32-bit sparc, unlike `gcc`. The patch therefore `XFAIL`s the tests. Tested on `sparcv9-sun-solaris2.11` and `amd64-pc-solaris2.11`. Differential Revision: https://reviews.llvm.org/D85346 --- compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test | 3 +++ compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test index 0c7198e3c4e9eb..52b51e6269f532 100644 --- a/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test +++ b/compiler-rt/test/profile/Posix/instrprof-gcov-parallel.test @@ -10,6 +10,9 @@ RUN: %run %t.driver %t.target RUN: llvm-cov gcov instrprof-gcov-parallel.target.gcda RUN: FileCheck --input-file instrprof-gcov-parallel.target.c.gcov %s +# Bug 42535 +# XFAIL: sparc-target-arch + # Test if the .gcda file is correctly created from one of child processes # and counters of all processes are recorded correctly. # 707 = CHILDREN * COUNT diff --git a/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp b/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp index 479c39f28428ad..1c680259a2471e 100644 --- a/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp +++ b/compiler-rt/test/ubsan/TestCases/Float/cast-overflow.cpp @@ -11,6 +11,9 @@ // FIXME: not %run %t 8 2>&1 | FileCheck %s --check-prefix=CHECK-8 // RUN: not %run %t 9 2>&1 | FileCheck %s --check-prefix=CHECK-9 +// Bug 42535 +// XFAIL: sparc-target-arch + // This test assumes float and double are IEEE-754 single- and double-precision. #if defined(__APPLE__) From 9b32ef9413be2f18ad98f24454854b438b5d9214 Mon Sep 17 00:00:00 2001 From: QingShan Zhang Date: Tue, 18 Aug 2020 09:40:37 +0000 Subject: [PATCH 021/101] [Test][NFC] Add a new test to verify if scheduler can cluster two ld/st even with different preds --- .../CodeGen/AArch64/aarch64-stp-cluster.ll | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll index e821e8504d962c..b0ed3d0490cc04 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll @@ -213,3 +213,28 @@ entry: store i32 %add, i32* %arrayidx1, align 4 ret void } + +; FIXME - The SU(4) and SU(7) can be clustered even with +; different preds +; CHECK: ********** MI Scheduling ********** +; CHECK-LABEL: cluster_with_different_preds:%bb.0 +; CHECK-NOT:Cluster ld/st SU(4) - SU(7) +; CHECK:SU(3): STRWui %2:gpr32, %0:gpr64common, 0 :: +; CHECK:SU(4): %3:gpr32 = LDRWui %1:gpr64common, 0 :: +; CHECK:Predecessors: +; CHECK: SU(3): Ord Latency=1 Memory +; CHECK:SU(6): STRBBui %4:gpr32, %1:gpr64common, 4 :: +; CHECK:SU(7): %5:gpr32 = LDRWui %1:gpr64common, 1 :: +; CHECK:Predecessors: +; CHECK:SU(6): Ord Latency=1 Memory +define i32 @cluster_with_different_preds(i32* %p, i32* %q) { +entry: + store i32 3, i32* %p, align 4 + %0 = load i32, i32* %q, align 4 + %add.ptr = getelementptr inbounds i32, i32* %q, i64 1 + %1 = bitcast i32* %add.ptr to i8* + store i8 5, i8* %1, align 1 + %2 = load i32, i32* %add.ptr, align 4 + %add = add nsw i32 %2, %0 + ret i32 %add +} From eaff200429a3dcf36eebfae39d2e859d6815285e Mon Sep 17 00:00:00 2001 From: sameeran joshi Date: Tue, 18 Aug 2020 15:05:51 +0530 Subject: [PATCH 022/101] [Flang] Move markdown files(.MD) from documentation/ to docs/ Summary: Other LLVM sub-projects use docs/ folder for documentation files. Follow LLVM project policy. Modify `documentation/` references in sources to `docs/`. This patch doesn't modify files to reStructuredText(.rst) file format. Reviewed By: DavidTruby, sscalpone Differential Revision: https://reviews.llvm.org/D85884 --- flang/README.md | 20 +++++++++---------- .../ArrayComposition.md | 2 +- .../BijectiveInternalNameUniquing.md | 0 flang/{documentation => docs}/C++17.md | 2 +- flang/{documentation => docs}/C++style.md | 2 +- flang/{documentation => docs}/Calls.md | 2 +- flang/{documentation => docs}/Character.md | 2 +- .../ControlFlowGraph.md | 2 +- flang/{documentation => docs}/Directives.md | 2 +- flang/{documentation => docs}/Extensions.md | 2 +- .../FortranForCProgrammers.md | 2 +- flang/{documentation => docs}/FortranIR.md | 2 +- .../IORuntimeInternals.md | 2 +- .../ImplementingASemanticCheck.md | 2 +- flang/{documentation => docs}/Intrinsics.md | 2 +- .../LabelResolution.md | 2 +- flang/{documentation => docs}/ModFiles.md | 2 +- .../OpenMP-4.5-grammar.txt | 2 +- .../OpenMP-semantics.md | 2 +- .../OptionComparison.md | 2 +- flang/{documentation => docs}/Overview.md | 2 +- .../ParserCombinators.md | 2 +- flang/{documentation => docs}/Parsing.md | 2 +- .../{documentation => docs}/Preprocessing.md | 2 +- .../PullRequestChecklist.md | 2 +- .../RuntimeDescriptor.md | 2 +- flang/{documentation => docs}/Semantics.md | 2 +- .../{documentation => docs}/f2018-grammar.txt | 2 +- .../{documentation => docs}/flang-c-style.el | 2 +- flang/lib/Evaluate/intrinsics.cpp | 2 +- 30 files changed, 38 insertions(+), 38 deletions(-) rename flang/{documentation => docs}/ArrayComposition.md (99%) rename flang/{documentation => docs}/BijectiveInternalNameUniquing.md (100%) rename flang/{documentation => docs}/C++17.md (99%) rename flang/{documentation => docs}/C++style.md (99%) rename flang/{documentation => docs}/Calls.md (99%) rename flang/{documentation => docs}/Character.md (99%) rename flang/{documentation => docs}/ControlFlowGraph.md (99%) rename flang/{documentation => docs}/Directives.md (92%) rename flang/{documentation => docs}/Extensions.md (99%) rename flang/{documentation => docs}/FortranForCProgrammers.md (99%) rename flang/{documentation => docs}/FortranIR.md (99%) rename flang/{documentation => docs}/IORuntimeInternals.md (99%) rename flang/{documentation => docs}/ImplementingASemanticCheck.md (99%) rename flang/{documentation => docs}/Intrinsics.md (99%) rename flang/{documentation => docs}/LabelResolution.md (99%) rename flang/{documentation => docs}/ModFiles.md (99%) rename flang/{documentation => docs}/OpenMP-4.5-grammar.txt (99%) rename flang/{documentation => docs}/OpenMP-semantics.md (99%) rename flang/{documentation => docs}/OptionComparison.md (99%) rename flang/{documentation => docs}/Overview.md (98%) rename flang/{documentation => docs}/ParserCombinators.md (99%) rename flang/{documentation => docs}/Parsing.md (99%) rename flang/{documentation => docs}/Preprocessing.md (99%) rename flang/{documentation => docs}/PullRequestChecklist.md (98%) rename flang/{documentation => docs}/RuntimeDescriptor.md (99%) rename flang/{documentation => docs}/Semantics.md (99%) rename flang/{documentation => docs}/f2018-grammar.txt (99%) rename flang/{documentation => docs}/flang-c-style.el (92%) diff --git a/flang/README.md b/flang/README.md index f7797ed55bd3ed..44573ae4b9b6b0 100644 --- a/flang/README.md +++ b/flang/README.md @@ -8,30 +8,30 @@ F18 was subsequently accepted into the LLVM project and rechristened as Flang. ## Getting Started -Read more about flang in the [documentation directory](documentation). -Start with the [compiler overview](documentation/Overview.md). +Read more about flang in the [docs directory](docs). +Start with the [compiler overview](docs/Overview.md). To better understand Fortran as a language and the specific grammar accepted by flang, -read [Fortran For C Programmers](documentation/FortranForCProgrammers.md) +read [Fortran For C Programmers](docs/FortranForCProgrammers.md) and -flang's specifications of the [Fortran grammar](documentation/f2018-grammar.txt) +flang's specifications of the [Fortran grammar](docs/f2018-grammar.txt) and -the [OpenMP grammar](documentation/OpenMP-4.5-grammar.txt). +the [OpenMP grammar](docs/OpenMP-4.5-grammar.txt). Treatment of language extensions is covered -in [this document](documentation/Extensions.md). +in [this document](docs/Extensions.md). To understand the compilers handling of intrinsics, -see the [discussion of intrinsics](documentation/Intrinsics.md). +see the [discussion of intrinsics](docs/Intrinsics.md). To understand how a flang program communicates with libraries at runtime, -see the discussion of [runtime descriptors](documentation/RuntimeDescriptor.md). +see the discussion of [runtime descriptors](docs/RuntimeDescriptor.md). If you're interested in contributing to the compiler, -read the [style guide](documentation/C++style.md) +read the [style guide](docs/C++style.md) and -also review [how flang uses modern C++ features](documentation/C++17.md). +also review [how flang uses modern C++ features](docs/C++17.md). ## Supported C++ compilers diff --git a/flang/documentation/ArrayComposition.md b/flang/docs/ArrayComposition.md similarity index 99% rename from flang/documentation/ArrayComposition.md rename to flang/docs/ArrayComposition.md index 099909c5ef0d04..0f30af39f9e4bb 100644 --- a/flang/documentation/ArrayComposition.md +++ b/flang/docs/ArrayComposition.md @@ -1,4 +1,4 @@ -* -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | `-int -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | |-* - | | `-xp - | |-, - | |-SimpleDeclaration - | | |-int - | | `-SimpleDeclarator - | | |-MemberPointer - | | | |-X - | | | |-:: - | | | `-* - | | `-pmi - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-BinaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-xp - | | |-->* - | | `-IdExpression - | | `-UnqualifiedId - | | `-pmi - | `-; - `-} -)txt")); + {R"txt( +BinaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-xp +|-->* +`-IdExpression + `-UnqualifiedId + `-pmi +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_Negation) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { bool operator!(); }; void test(X x) { - !x; + [[!x]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-bool -| | |-SimpleDeclarator -| | | |-operator -| | | |-! -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-! - | | `-IdExpression - | | `-UnqualifiedId - | | `-x - | `-; - `-} -)txt")); + {R"txt( +PrefixUnaryOperatorExpression +|-! +`-IdExpression + `-UnqualifiedId + `-x +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_AddressOf) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X* operator&(); }; void test(X x) { - &x; + [[&x]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-* -| | | |-operator -| | | |-& -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-& - | | `-IdExpression - | | `-UnqualifiedId - | | `-x - | `-; - `-} -)txt")); + {R"txt( +PrefixUnaryOperatorExpression +|-& +`-IdExpression + `-UnqualifiedId + `-x +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_PrefixIncrement) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X operator++(); }; void test(X x) { - ++x; + [[++x]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-++ -| | | `-ParametersAndQualifiers -| | | |-( -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PrefixUnaryOperatorExpression - | | |-++ - | | `-IdExpression - | | `-UnqualifiedId - | | `-x - | `-; - `-} -)txt")); + {R"txt( +PrefixUnaryOperatorExpression +|-++ +`-IdExpression + `-UnqualifiedId + `-x +)txt"})); } TEST_P(SyntaxTreeTest, OverloadedOperator_PostfixIncrement) { if (!GetParam().isCXX()) { return; } - EXPECT_TRUE(treeDumpEqual( + EXPECT_TRUE(treeDumpEqualOnAnnotations( R"cpp( struct X { X operator++(int); }; void test(X x) { - x++; + [[x++]]; } )cpp", - R"txt( -*: TranslationUnit -|-SimpleDeclaration -| |-struct -| |-X -| |-{ -| |-SimpleDeclaration -| | |-X -| | |-SimpleDeclarator -| | | |-operator -| | | |-++ -| | | `-ParametersAndQualifiers -| | | |-( -| | | |-SimpleDeclaration -| | | | `-int -| | | `-) -| | `-; -| |-} -| `-; -`-SimpleDeclaration - |-void - |-SimpleDeclarator - | |-test - | `-ParametersAndQualifiers - | |-( - | |-SimpleDeclaration - | | |-X - | | `-SimpleDeclarator - | | `-x - | `-) - `-CompoundStatement - |-{ - |-ExpressionStatement - | |-PostfixUnaryOperatorExpression - | | |-IdExpression - | | | `-UnqualifiedId - | | | `-x - | | `-++ - | `-; - `-} -)txt")); + {R"txt( +PostfixUnaryOperatorExpression +|-IdExpression +| `-UnqualifiedId +| `-x +`-++ +)txt"})); } TEST_P(SyntaxTreeTest, MultipleDeclaratorsGrouping) { @@ -3343,6 +2054,33 @@ void foo() { )txt")); } +TEST_P(SyntaxTreeTest, SizeTTypedef) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +typedef decltype(sizeof(void *)) size_t; + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-typedef + |-decltype + |-( + |-UnknownExpression + | |-sizeof + | |-( + | |-void + | |-* + | `-) + |-) + |-SimpleDeclarator + | `-size_t + `-; +)txt")); +} + TEST_P(SyntaxTreeTest, Namespaces) { if (!GetParam().isCXX()) { return; @@ -3496,68 +2234,318 @@ struct {} *a1; )txt")); } -TEST_P(SyntaxTreeTest, Templates) { +TEST_P(SyntaxTreeTest, StaticMemberFunction) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct S { + static void f(){} +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-S + |-{ + |-SimpleDeclaration + | |-static + | |-void + | |-SimpleDeclarator + | | |-f + | | `-ParametersAndQualifiers + | | |-( + | | `-) + | `-CompoundStatement + | |-{ + | `-} + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, ConversionMemberFunction) { if (!GetParam().isCXX()) { return; } - if (GetParam().hasDelayedTemplateParsing()) { - // FIXME: Make this test work on Windows by generating the expected syntax - // tree when `-fdelayed-template-parsing` is active. + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct X { + operator int(); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-X + |-{ + |-SimpleDeclaration + | |-SimpleDeclarator + | | |-operator + | | |-int + | | `-ParametersAndQualifiers + | | |-( + | | `-) + | `-; + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, LiteralOperatorDeclaration) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +unsigned operator "" _c(char); + )cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-unsigned + |-SimpleDeclarator + | |-operator + | |-"" + | |-_c + | `-ParametersAndQualifiers + | |-( + | |-SimpleDeclaration + | | `-char + | `-) + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, NumericLiteralOperatorTemplateDeclaration) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template +unsigned operator "" _t(); + )cpp", + R"txt( +*: TranslationUnit +`-TemplateDeclaration + |-template + |-< + |-SimpleDeclaration + | `-char + |-... + |-> + `-SimpleDeclaration + |-unsigned + |-SimpleDeclarator + | |-operator + | |-"" + | |-_t + | `-ParametersAndQualifiers + | |-( + | `-) + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, OverloadedOperatorDeclaration) { + if (!GetParam().isCXX()) { return; } EXPECT_TRUE(treeDumpEqual( R"cpp( -template struct cls {}; -template int var = 10; -template int fun() {} +struct X { + X& operator=(const X&); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-X + |-{ + |-SimpleDeclaration + | |-X + | |-SimpleDeclarator + | | |-& + | | |-operator + | | |-= + | | `-ParametersAndQualifiers + | | |-( + | | |-SimpleDeclaration + | | | |-const + | | | |-X + | | | `-SimpleDeclarator + | | | `-& + | | `-) + | `-; + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, OverloadedOperatorFriendDeclarataion) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct X { + friend X operator+(X, const X&); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-X + |-{ + |-UnknownDeclaration + | `-SimpleDeclaration + | |-friend + | |-X + | |-SimpleDeclarator + | | |-operator + | | |-+ + | | `-ParametersAndQualifiers + | | |-( + | | |-SimpleDeclaration + | | | `-X + | | |-, + | | |-SimpleDeclaration + | | | |-const + | | | |-X + | | | `-SimpleDeclarator + | | | `-& + | | `-) + | `-; + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, ClassTemplateDeclaration) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template +struct ST {}; )cpp", R"txt( *: TranslationUnit -|-TemplateDeclaration -| |-template -| |-< -| |-UnknownDeclaration -| | |-class -| | `-T -| |-> -| `-SimpleDeclaration -| |-struct -| |-cls -| |-{ -| |-} -| `-; -|-TemplateDeclaration -| |-template -| |-< -| |-UnknownDeclaration -| | |-class -| | `-T -| |-> -| `-SimpleDeclaration -| |-int -| |-SimpleDeclarator -| | |-var -| | |-= -| | `-IntegerLiteralExpression -| | `-10 -| `-; `-TemplateDeclaration |-template |-< |-UnknownDeclaration - | |-class + | |-typename + | `-T + |-> + `-SimpleDeclaration + |-struct + |-ST + |-{ + |-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, FunctionTemplateDeclaration) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template +T f(); +)cpp", + R"txt( +*: TranslationUnit +`-TemplateDeclaration + |-template + |-< + |-UnknownDeclaration + | |-typename | `-T |-> `-SimpleDeclaration - |-int + |-T |-SimpleDeclarator - | |-fun + | |-f | `-ParametersAndQualifiers | |-( | `-) - `-CompoundStatement - |-{ - `-} + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, VariableTemplateDeclaration) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +template T var = 10; +)cpp", + R"txt( +*: TranslationUnit +`-TemplateDeclaration + |-template + |-< + |-UnknownDeclaration + | |-class + | `-T + |-> + `-SimpleDeclaration + |-T + |-SimpleDeclarator + | |-var + | |-= + | `-IntegerLiteralExpression + | `-10 + `-; +)txt")); +} + +TEST_P(SyntaxTreeTest, StaticMemberFunctionTemplate) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +struct S { + template + static U f(); +}; +)cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-struct + |-S + |-{ + |-TemplateDeclaration + | |-template + | |-< + | |-UnknownDeclaration + | | |-typename + | | `-U + | |-> + | `-SimpleDeclaration + | |-static + | |-U + | |-SimpleDeclarator + | | |-f + | | `-ParametersAndQualifiers + | | |-( + | | `-) + | `-; + |-} + `-; )txt")); } @@ -3606,6 +2594,59 @@ struct X { )txt")); } +TEST_P(SyntaxTreeTest, NestedTemplatesInNamespace) { + if (!GetParam().isCXX()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +namespace n { + template + struct ST { + template + static U f(); + }; +} +)cpp", + R"txt( +*: TranslationUnit +`-NamespaceDefinition + |-namespace + |-n + |-{ + |-TemplateDeclaration + | |-template + | |-< + | |-UnknownDeclaration + | | |-typename + | | `-T + | |-> + | `-SimpleDeclaration + | |-struct + | |-ST + | |-{ + | |-TemplateDeclaration + | | |-template + | | |-< + | | |-UnknownDeclaration + | | | |-typename + | | | `-U + | | |-> + | | `-SimpleDeclaration + | | |-static + | | |-U + | | |-SimpleDeclarator + | | | |-f + | | | `-ParametersAndQualifiers + | | | |-( + | | | `-) + | | `-; + | |-} + | `-; + `-} +)txt")); +} + TEST_P(SyntaxTreeTest, Templates2) { if (!GetParam().isCXX()) { return; diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp index 05fbac4f47e1c3..c5dbb770c53879 100644 --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -171,7 +171,7 @@ ::testing::AssertionResult SyntaxTreeTest::treeDumpEqual(StringRef Code, << "Source file has syntax errors, they were printed to the test " "log"; } - std::string Actual = std::string(StringRef(Root->dump(*Arena)).trim()); + auto Actual = StringRef(Root->dump(*Arena)).trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Tree.trim().str(), Actual); if (Actual != Tree.trim().str()) { @@ -194,21 +194,29 @@ SyntaxTreeTest::treeDumpEqualOnAnnotations(StringRef CodeWithAnnotations, "log"; } - bool failed = false; auto AnnotatedRanges = AnnotatedCode.ranges(); - assert(AnnotatedRanges.size() == TreeDumps.size()); - for (auto i = 0ul; i < AnnotatedRanges.size(); i++) { + if (AnnotatedRanges.size() != TreeDumps.size()) { + return ::testing::AssertionFailure() + << "The number of annotated ranges in the source code is different " + "to the number of their corresponding tree dumps."; + } + bool Failed = false; + for (unsigned i = 0; i < AnnotatedRanges.size(); i++) { auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); assert(AnnotatedNode); auto AnnotatedNodeDump = - std::string(StringRef(AnnotatedNode->dump(*Arena)).trim()); + StringRef(AnnotatedNode->dump(*Arena)).trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. - EXPECT_EQ(TreeDumps[i].trim().str(), AnnotatedNodeDump); + EXPECT_EQ(TreeDumps[i].trim().str(), AnnotatedNodeDump) + << "Dumps diverged for the code:\n" + << AnnotatedCode.code().slice(AnnotatedRanges[i].Begin, + AnnotatedRanges[i].End); if (AnnotatedNodeDump != TreeDumps[i].trim().str()) - failed = true; + Failed = true; } - return failed ? ::testing::AssertionFailure() : ::testing::AssertionSuccess(); + return Failed ? ::testing::AssertionFailure() : ::testing::AssertionSuccess(); } + syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { ArrayRef Toks = tokens(Root); From 1b93ebccaa094c079db7ad729e2f7fea7bac2f34 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 5 Aug 2020 11:48:35 -0400 Subject: [PATCH 035/101] [OPENMP]Do not capture base pointer by reference if it is used as a base for array-like reduction. If the declaration is used in the reduction clause, it is captured by reference by default. But if the declaration is a pointer and it is a base for array-like reduction, this declaration can be captured by value, since the pointee is reduced but not the original declaration. Differential Revision: https://reviews.llvm.org/D85321 --- clang/lib/Sema/SemaOpenMP.cpp | 155 ++++++++++++------ ...te_parallel_for_reduction_task_codegen.cpp | 7 +- .../OpenMP/for_reduction_task_codegen.cpp | 3 +- .../parallel_for_reduction_task_codegen.cpp | 7 +- ...parallel_master_reduction_task_codegen.cpp | 7 +- .../parallel_reduction_task_codegen.cpp | 7 +- ...rallel_sections_reduction_task_codegen.cpp | 7 +- .../sections_reduction_task_codegen.cpp | 3 +- ...et_parallel_for_reduction_task_codegen.cpp | 7 +- ...target_parallel_reduction_task_codegen.cpp | 7 +- ...te_parallel_for_reduction_task_codegen.cpp | 7 +- ...te_parallel_for_reduction_task_codegen.cpp | 7 +- 12 files changed, 131 insertions(+), 93 deletions(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a493f3114dc299..53917ef98acdff 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -70,12 +70,15 @@ class DSAStackTy { const Expr *RefExpr = nullptr; DeclRefExpr *PrivateCopy = nullptr; SourceLocation ImplicitDSALoc; + bool AppliedToPointee = false; DSAVarData() = default; DSAVarData(OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, const Expr *RefExpr, DeclRefExpr *PrivateCopy, - SourceLocation ImplicitDSALoc, unsigned Modifier) + SourceLocation ImplicitDSALoc, unsigned Modifier, + bool AppliedToPointee) : DKind(DKind), CKind(CKind), Modifier(Modifier), RefExpr(RefExpr), - PrivateCopy(PrivateCopy), ImplicitDSALoc(ImplicitDSALoc) {} + PrivateCopy(PrivateCopy), ImplicitDSALoc(ImplicitDSALoc), + AppliedToPointee(AppliedToPointee) {} }; using OperatorOffsetTy = llvm::SmallVector, 4>; @@ -99,6 +102,9 @@ class DSAStackTy { /// variable is marked as lastprivate(true) or not (false). llvm::PointerIntPair RefExpr; DeclRefExpr *PrivateCopy = nullptr; + /// true if the attribute is applied to the pointee, not the variable + /// itself. + bool AppliedToPointee = false; }; using DeclSAMapTy = llvm::SmallDenseMap; using UsedRefMapTy = llvm::SmallDenseMap; @@ -511,7 +517,8 @@ class DSAStackTy { /// Adds explicit data sharing attribute to the specified declaration. void addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A, - DeclRefExpr *PrivateCopy = nullptr, unsigned Modifier = 0); + DeclRefExpr *PrivateCopy = nullptr, unsigned Modifier = 0, + bool AppliedToPointee = false); /// Adds additional information for the reduction items with the reduction id /// represented as an operator. @@ -563,7 +570,8 @@ class DSAStackTy { /// match specified \a CPred predicate in any directive which matches \a DPred /// predicate. const DSAVarData - hasDSA(ValueDecl *D, const llvm::function_ref CPred, + hasDSA(ValueDecl *D, + const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const; /// Checks if the specified variables has data-sharing attributes which @@ -571,15 +579,16 @@ class DSAStackTy { /// matches \a DPred predicate. const DSAVarData hasInnermostDSA(ValueDecl *D, - const llvm::function_ref CPred, + const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const; /// Checks if the specified variables has explicit data-sharing /// attributes which match specified \a CPred predicate at the specified /// OpenMP region. - bool hasExplicitDSA(const ValueDecl *D, - const llvm::function_ref CPred, - unsigned Level, bool NotLastprivate = false) const; + bool + hasExplicitDSA(const ValueDecl *D, + const llvm::function_ref CPred, + unsigned Level, bool NotLastprivate = false) const; /// Returns true if the directive at level \Level matches in the /// specified \a DPred predicate. @@ -1185,6 +1194,7 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter, DVar.CKind = Data.Attributes; DVar.ImplicitDSALoc = Iter->DefaultAttrLoc; DVar.Modifier = Data.Modifier; + DVar.AppliedToPointee = Data.AppliedToPointee; return DVar; } @@ -1341,7 +1351,8 @@ const ValueDecl *DSAStackTy::getParentLoopControlVariable(unsigned I) const { } void DSAStackTy::addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A, - DeclRefExpr *PrivateCopy, unsigned Modifier) { + DeclRefExpr *PrivateCopy, unsigned Modifier, + bool AppliedToPointee) { D = getCanonicalDecl(D); if (A == OMPC_threadprivate) { DSAInfo &Data = Threadprivates[D]; @@ -1365,12 +1376,14 @@ void DSAStackTy::addDSA(const ValueDecl *D, const Expr *E, OpenMPClauseKind A, Data.Attributes = A; Data.RefExpr.setPointerAndInt(E, IsLastprivate); Data.PrivateCopy = PrivateCopy; + Data.AppliedToPointee = AppliedToPointee; if (PrivateCopy) { DSAInfo &Data = getTopOfStack().SharingMap[PrivateCopy->getDecl()]; Data.Modifier = Modifier; Data.Attributes = A; Data.RefExpr.setPointerAndInt(PrivateCopy, IsLastprivate); Data.PrivateCopy = nullptr; + Data.AppliedToPointee = AppliedToPointee; } } } @@ -1480,7 +1493,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task, + /*AppliedToPointee=*/false); } return DSAVarData(); } @@ -1506,7 +1520,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData( "set."); TaskgroupDescriptor = I->TaskgroupReductionRef; return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(), - Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task); + Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task, + /*AppliedToPointee=*/false); } return DSAVarData(); } @@ -1675,6 +1690,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, DVar.ImplicitDSALoc = I->DefaultAttrLoc; DVar.DKind = I->Directive; DVar.Modifier = Data.Modifier; + DVar.AppliedToPointee = Data.AppliedToPointee; return DVar; } } @@ -1696,7 +1712,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, // listed in a firstprivate clause, even if they are static data members. DSAVarData DVarTemp = hasInnermostDSA( D, - [](OpenMPClauseKind C) { + [](OpenMPClauseKind C, bool) { return C == OMPC_firstprivate || C == OMPC_shared; }, MatchesAlways, FromParent); @@ -1725,6 +1741,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, DVar.ImplicitDSALoc = I->DefaultAttrLoc; DVar.DKind = I->Directive; DVar.Modifier = Data.Modifier; + DVar.AppliedToPointee = Data.AppliedToPointee; } return DVar; @@ -1755,7 +1772,7 @@ const DSAStackTy::DSAVarData DSAStackTy::getImplicitDSA(ValueDecl *D, const DSAStackTy::DSAVarData DSAStackTy::hasDSA(ValueDecl *D, - const llvm::function_ref CPred, + const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const { if (isStackEmpty()) @@ -1771,14 +1788,14 @@ DSAStackTy::hasDSA(ValueDecl *D, continue; const_iterator NewI = I; DSAVarData DVar = getDSA(NewI, D); - if (I == NewI && CPred(DVar.CKind)) + if (I == NewI && CPred(DVar.CKind, DVar.AppliedToPointee)) return DVar; } return {}; } const DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA( - ValueDecl *D, const llvm::function_ref CPred, + ValueDecl *D, const llvm::function_ref CPred, const llvm::function_ref DPred, bool FromParent) const { if (isStackEmpty()) @@ -1792,26 +1809,28 @@ const DSAStackTy::DSAVarData DSAStackTy::hasInnermostDSA( return {}; const_iterator NewI = StartI; DSAVarData DVar = getDSA(NewI, D); - return (NewI == StartI && CPred(DVar.CKind)) ? DVar : DSAVarData(); + return (NewI == StartI && CPred(DVar.CKind, DVar.AppliedToPointee)) + ? DVar + : DSAVarData(); } bool DSAStackTy::hasExplicitDSA( - const ValueDecl *D, const llvm::function_ref CPred, + const ValueDecl *D, + const llvm::function_ref CPred, unsigned Level, bool NotLastprivate) const { if (getStackSize() <= Level) return false; D = getCanonicalDecl(D); const SharingMapTy &StackElem = getStackElemAtLevel(Level); auto I = StackElem.SharingMap.find(D); - if (I != StackElem.SharingMap.end() && - I->getSecond().RefExpr.getPointer() && - CPred(I->getSecond().Attributes) && + if (I != StackElem.SharingMap.end() && I->getSecond().RefExpr.getPointer() && + CPred(I->getSecond().Attributes, I->getSecond().AppliedToPointee) && (!NotLastprivate || !I->getSecond().RefExpr.getInt())) return true; // Check predetermined rules for the loop control variables. auto LI = StackElem.LCVMap.find(D); if (LI != StackElem.LCVMap.end()) - return CPred(OMPC_private); + return CPred(OMPC_private, /*AppliedToPointee=*/false); return false; } @@ -2057,14 +2076,17 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, // By default, all the data that has a scalar type is mapped by copy // (except for reduction variables). // Defaultmap scalar is mutual exclusive to defaultmap pointer - IsByRef = - (DSAStack->isForceCaptureByReferenceInTargetExecutable() && - !Ty->isAnyPointerType()) || - !Ty->isScalarType() || - DSAStack->isDefaultmapCapturedByRef( - Level, getVariableCategoryFromDecl(LangOpts, D)) || - DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_reduction; }, Level); + IsByRef = (DSAStack->isForceCaptureByReferenceInTargetExecutable() && + !Ty->isAnyPointerType()) || + !Ty->isScalarType() || + DSAStack->isDefaultmapCapturedByRef( + Level, getVariableCategoryFromDecl(LangOpts, D)) || + DSAStack->hasExplicitDSA( + D, + [](OpenMPClauseKind K, bool AppliedToPointee) { + return K == OMPC_reduction && !AppliedToPointee; + }, + Level); } } @@ -2075,8 +2097,9 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, OMPD_target) || !(DSAStack->hasExplicitDSA( D, - [](OpenMPClauseKind K) -> bool { - return K == OMPC_firstprivate; + [](OpenMPClauseKind K, bool AppliedToPointee) -> bool { + return K == OMPC_firstprivate || + (K == OMPC_reduction && AppliedToPointee); }, Level, /*NotLastprivate=*/true) || DSAStack->isUsesAllocatorsDecl(Level, D))) && @@ -2088,7 +2111,8 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, // copy !(DSAStack->getDefaultDSA() == DSA_firstprivate && !DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K != OMPC_unknown; }, Level) && + D, [](OpenMPClauseKind K, bool) { return K != OMPC_unknown; }, + Level) && !DSAStack->isLoopControlVariable(D, Level).first); } @@ -2151,7 +2175,8 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) checkDeclIsAllowedInOpenMPTarget(nullptr, VD); return nullptr; - } else if (isInOpenMPTargetExecutionDirective()) { + } + if (isInOpenMPTargetExecutionDirective()) { // If the declaration is enclosed in a 'declare target' directive, // then it should not be captured. // @@ -2204,7 +2229,8 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, return VD ? VD : Info.second; DSAStackTy::DSAVarData DVarTop = DSAStack->getTopDSA(D, DSAStack->isClauseParsingMode()); - if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind)) + if (DVarTop.CKind != OMPC_unknown && isOpenMPPrivate(DVarTop.CKind) && + (!VD || VD->hasLocalStorage() || !DVarTop.AppliedToPointee)) return VD ? VD : cast(DVarTop.PrivateCopy->getDecl()); // Threadprivate variables must not be captured. if (isOpenMPThreadPrivate(DVarTop.CKind)) @@ -2212,7 +2238,11 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo, // The variable is not private or it is the variable in the directive with // default(none) clause and not used in any clause. DSAStackTy::DSAVarData DVarPrivate = DSAStack->hasDSA( - D, isOpenMPPrivate, [](OpenMPDirectiveKind) { return true; }, + D, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return isOpenMPPrivate(C) && !AppliedToPointee; + }, + [](OpenMPDirectiveKind) { return true; }, DSAStack->isClauseParsingMode()); // Global shared must not be captured. if (VD && !VD->hasLocalStorage() && DVarPrivate.CKind == OMPC_unknown && @@ -2266,7 +2296,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, (IsTriviallyCopyable || !isOpenMPTaskLoopDirective(CaptureRegions[CapLevel]))) { if (DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_firstprivate; }, + D, + [](OpenMPClauseKind K, bool) { return K == OMPC_firstprivate; }, Level, /*NotLastprivate=*/true)) return OMPC_firstprivate; DSAStackTy::DSAVarData DVar = DSAStack->getImplicitDSA(D, Level); @@ -2287,7 +2318,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, if ((DSAStack->getPossiblyLoopCunter() == D->getCanonicalDecl() || DSAStack->isLoopControlVariable(D).first) && !DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K != OMPC_private; }, Level) && + D, [](OpenMPClauseKind K, bool) { return K != OMPC_private; }, + Level) && !isOpenMPSimdDirective(DSAStack->getCurrentDirective())) return OMPC_private; } @@ -2295,7 +2327,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, if (DSAStack->isThreadPrivate(const_cast(VD)) && DSAStack->isForceVarCapturing() && !DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_copyin; }, Level)) + D, [](OpenMPClauseKind K, bool) { return K == OMPC_copyin; }, + Level)) return OMPC_private; } // User-defined allocators are private since they must be defined in the @@ -2306,7 +2339,8 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level, DSAStackTy::UsesAllocatorsDeclKind::UserDefinedAllocator) return OMPC_private; return (DSAStack->hasExplicitDSA( - D, [](OpenMPClauseKind K) { return K == OMPC_private; }, Level) || + D, [](OpenMPClauseKind K, bool) { return K == OMPC_private; }, + Level) || (DSAStack->isClauseParsingMode() && DSAStack->getClauseParsingMode() == OMPC_private) || // Consider taskgroup reduction descriptor variable a private @@ -2331,15 +2365,16 @@ void Sema::setOpenMPCaptureKind(FieldDecl *FD, const ValueDecl *D, OpenMPClauseKind OMPC = OMPC_unknown; for (unsigned I = DSAStack->getNestingLevel() + 1; I > Level; --I) { const unsigned NewLevel = I - 1; - if (DSAStack->hasExplicitDSA(D, - [&OMPC](const OpenMPClauseKind K) { - if (isOpenMPPrivate(K)) { - OMPC = K; - return true; - } - return false; - }, - NewLevel)) + if (DSAStack->hasExplicitDSA( + D, + [&OMPC](const OpenMPClauseKind K, bool AppliedToPointee) { + if (isOpenMPPrivate(K) && !AppliedToPointee) { + OMPC = K; + return true; + } + return false; + }, + NewLevel)) break; if (DSAStack->checkMappableExprComponentListsForDeclAtLevel( D, NewLevel, @@ -3474,7 +3509,10 @@ class DSAAttrChecker final : public StmtVisitor { // enclosing worksharing or parallel construct may not be accessed in an // explicit task. DVar = Stack->hasInnermostDSA( - VD, [](OpenMPClauseKind C) { return C == OMPC_reduction; }, + VD, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return C == OMPC_reduction && !AppliedToPointee; + }, [](OpenMPDirectiveKind K) { return isOpenMPParallelDirective(K) || isOpenMPWorksharingDirective(K) || isOpenMPTeamsDirective(K); @@ -3559,7 +3597,10 @@ class DSAAttrChecker final : public StmtVisitor { // enclosing worksharing or parallel construct may not be accessed in // an explicit task. DVar = Stack->hasInnermostDSA( - FD, [](OpenMPClauseKind C) { return C == OMPC_reduction; }, + FD, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return C == OMPC_reduction && !AppliedToPointee; + }, [](OpenMPDirectiveKind K) { return isOpenMPParallelDirective(K) || isOpenMPWorksharingDirective(K) || isOpenMPTeamsDirective(K); @@ -14044,7 +14085,10 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef VarList, // from the worksharing construct. if (isOpenMPTaskingDirective(CurrDir)) { DVar = DSAStack->hasInnermostDSA( - D, [](OpenMPClauseKind C) { return C == OMPC_reduction; }, + D, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return C == OMPC_reduction && !AppliedToPointee; + }, [](OpenMPDirectiveKind K) { return isOpenMPParallelDirective(K) || isOpenMPWorksharingDirective(K) || @@ -14435,7 +14479,11 @@ class DSARefChecker : public StmtVisitor { if (DVar.CKind != OMPC_unknown) return true; DSAStackTy::DSAVarData DVarPrivate = Stack->hasDSA( - VD, isOpenMPPrivate, [](OpenMPDirectiveKind) { return true; }, + VD, + [](OpenMPClauseKind C, bool AppliedToPointee) { + return isOpenMPPrivate(C) && !AppliedToPointee; + }, + [](OpenMPDirectiveKind) { return true; }, /*FromParent=*/true); return DVarPrivate.CKind != OMPC_unknown; } @@ -15513,7 +15561,8 @@ static bool actOnOMPReductionKindClause( // correct analysis of in_reduction clauses. if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction) Modifier = OMPC_REDUCTION_task; - Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier); + Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier, + ASE || OASE); if (Modifier == OMPC_REDUCTION_task && (CurrDir == OMPD_taskgroup || ((isOpenMPParallelDirective(CurrDir) || diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp index 971e9be8534ba6..995ded43db3d84 100644 --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -20,9 +20,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], @@ -124,7 +124,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp index ea8fc55d9cb2f9..0018e109aaed9a 100644 --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -124,7 +124,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp index b4f4f83ec95549..fcee3d645b4ae1 100644 --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], @@ -123,7 +123,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp index 0f8366fa95e338..ab76987a59c931 100644 --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], // CHECK: [[TG:%.+]] = alloca i8*, @@ -122,7 +122,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp index 5e04aa8c1ec287..c64ffb50800648 100644 --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], // CHECK: [[TG:%.+]] = alloca i8*, @@ -122,7 +122,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp index 867eb45a1332b6..5481f0b2daa4b3 100644 --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: alloca i32, // CHECK: alloca i32, @@ -127,7 +127,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp index be67a2a174004f..1c0be118a03ca4 100644 --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -128,7 +128,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp index 5c5ea6b90d5297..66a20141df0394 100644 --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], @@ -123,7 +123,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp index 2fc49d44c1e904..e42e372ea67a72 100644 --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t], // CHECK: [[TG:%.+]] = alloca i8*, @@ -122,7 +122,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp index 06c0f8744e8cca..fbd990699d8327 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -19,9 +19,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], @@ -123,7 +123,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp index 194999f8cbb05c..26ca2352cc9ac5 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -20,9 +20,9 @@ int main(int argc, char **argv) { } } -// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}}) +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8**)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8** %{{.+}}) -// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}}) +// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8** {{.+}}) // CHECK: alloca i32, // CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32, // CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]], @@ -124,7 +124,6 @@ int main(int argc, char **argv) { // CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], // CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], // CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], -// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 +// CHECK-DAG: [[ARGV_ADDR_REF]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif From bd7daf5ceb92db00d3fc5d1ce8d4f74dcd03ebb9 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Tue, 18 Aug 2020 15:52:09 +0300 Subject: [PATCH 036/101] [yaml2obj] - Don't crash when `FileHeader` declares an empty `Flags` key in specific situations. We currently call the `llvm_unreachable` for the following YAML: ``` --- !ELF FileHeader: Class: ELFCLASS32 Data: ELFDATA2LSB Type: ET_REL Machine: EM_NONE Flags: [ ] ``` it happens because the `Flags` key is present, though `EM_NONE` is a machine type that has no known `EF_*` values and we call `llvm_unreachable` by mistake. Differential revision: https://reviews.llvm.org/D86138 --- llvm/lib/ObjectYAML/ELFYAML.cpp | 4 +--- llvm/test/tools/yaml2obj/ELF/eflags.yaml | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 llvm/test/tools/yaml2obj/ELF/eflags.yaml diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index 319e37022c85a9..e5d5e6a01bc6bf 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -434,10 +434,8 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_AMDGPU_XNACK); BCase(EF_AMDGPU_SRAM_ECC); break; - case ELF::EM_X86_64: - break; default: - llvm_unreachable("Unsupported architecture"); + break; } #undef BCase #undef BCaseMask diff --git a/llvm/test/tools/yaml2obj/ELF/eflags.yaml b/llvm/test/tools/yaml2obj/ELF/eflags.yaml new file mode 100644 index 00000000000000..8b90a2b2c94451 --- /dev/null +++ b/llvm/test/tools/yaml2obj/ELF/eflags.yaml @@ -0,0 +1,16 @@ +## Check how the 'Flags' key can be used to encode e_flags field values. + +## Check we are able to produce no flags for EM_NONE. EM_NONE is an arbitrary +## e_machine type that has no EF_* values defined for it. +# RUN: yaml2obj %s -o %t-no-flags +# RUN: llvm-readelf --file-headers %t-no-flags | FileCheck %s --check-prefix=NOFLAGS + +# NOFLAGS: Flags: 0x0{{$}} + +--- !ELF +FileHeader: + Class: ELFCLASS32 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_NONE + Flags: [ ] From e0aa335334813b15d2106ccdcf4930d72aa33772 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 18 Aug 2020 08:24:37 -0400 Subject: [PATCH 037/101] [InstCombine] add tests for fneg+fabs; NFC --- llvm/test/Transforms/InstCombine/fabs.ll | 63 +++++++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll index 0b474045d59675..c0d2c3b701f033 100644 --- a/llvm/test/Transforms/InstCombine/fabs.ll +++ b/llvm/test/Transforms/InstCombine/fabs.ll @@ -4,6 +4,7 @@ ; Make sure libcalls are replaced with intrinsic calls. declare float @llvm.fabs.f32(float) +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) declare double @llvm.fabs.f64(double) declare fp128 @llvm.fabs.f128(fp128) @@ -13,6 +14,8 @@ declare fp128 @fabsl(fp128) declare float @llvm.fma.f32(float, float, float) declare float @llvm.fmuladd.f32(float, float, float) +declare void @use(float) + define float @replace_fabs_call_f32(float %x) { ; CHECK-LABEL: @replace_fabs_call_f32( ; CHECK-NEXT: [[FABSF:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) @@ -116,8 +119,8 @@ define float @square_fabs_shrink_call1(float %x) { define float @square_fabs_shrink_call2(float %x) { ; CHECK-LABEL: @square_fabs_shrink_call2( ; CHECK-NEXT: [[SQ:%.*]] = fmul float [[X:%.*]], [[X]] -; CHECK-NEXT: [[TRUNC:%.*]] = call float @llvm.fabs.f32(float [[SQ]]) -; CHECK-NEXT: ret float [[TRUNC]] +; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[SQ]]) +; CHECK-NEXT: ret float [[TMP1]] ; %sq = fmul float %x, %x %ext = fpext float %sq to double @@ -745,3 +748,59 @@ define half @select_fcmp_nnan_nsz_uge_negzero_unary_fneg(half %x) { %fabs = select i1 %gezero, half %x, half %negx ret half %fabs } + +define float @select_fneg(i1 %c, float %x) { +; CHECK-LABEL: @select_fneg( +; CHECK-NEXT: [[N:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], float [[N]], float [[X]] +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[S]]) +; CHECK-NEXT: ret float [[FABS]] +; + %n = fneg float %x + %s = select i1 %c, float %n, float %x + %fabs = call float @llvm.fabs.f32(float %s) + ret float %fabs +} + +define float @select_fneg_use1(i1 %c, float %x) { +; CHECK-LABEL: @select_fneg_use1( +; CHECK-NEXT: [[N:%.*]] = fneg float [[X:%.*]] +; CHECK-NEXT: call void @use(float [[N]]) +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], float [[X]], float [[N]] +; CHECK-NEXT: [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[S]]) +; CHECK-NEXT: ret float [[FABS]] +; + %n = fneg float %x + call void @use(float %n) + %s = select i1 %c, float %x, float %n + %fabs = call fast float @llvm.fabs.f32(float %s) + ret float %fabs +} + +define float @select_fneg_use2(i1 %c, float %x) { +; CHECK-LABEL: @select_fneg_use2( +; CHECK-NEXT: [[N:%.*]] = fneg arcp float [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], float [[N]], float [[X]] +; CHECK-NEXT: call void @use(float [[S]]) +; CHECK-NEXT: [[FABS:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[S]]) +; CHECK-NEXT: ret float [[FABS]] +; + %n = fneg arcp float %x + %s = select i1 %c, float %n, float %x + call void @use(float %s) + %fabs = call nnan nsz float @llvm.fabs.f32(float %s) + ret float %fabs +} + +define <2 x float> @select_fneg_vec(<2 x i1> %c, <2 x float> %x) { +; CHECK-LABEL: @select_fneg_vec( +; CHECK-NEXT: [[N:%.*]] = fneg <2 x float> [[X:%.*]] +; CHECK-NEXT: [[S:%.*]] = select fast <2 x i1> [[C:%.*]], <2 x float> [[X]], <2 x float> [[N]] +; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[S]]) +; CHECK-NEXT: ret <2 x float> [[FABS]] +; + %n = fneg <2 x float> %x + %s = select fast <2 x i1> %c, <2 x float> %x, <2 x float> %n + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %s) + ret <2 x float> %fabs +} From 139da9c4d74391cd9d12600650ef95d5d68d8b59 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 18 Aug 2020 09:19:03 -0400 Subject: [PATCH 038/101] [InstCombine] fold fabs of select with negated operand This is the FP example shown in: https://bugs.llvm.org/PR39474 --- .../InstCombine/InstCombineCalls.cpp | 20 +++++++++++++------ llvm/test/Transforms/InstCombine/fabs.ll | 13 ++++-------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6a188f6a4da416..fa9c6e184e3858 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1230,13 +1230,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { break; } case Intrinsic::fabs: { - Value *Cond; - Constant *LHS, *RHS; + Value *Cond, *TVal, *FVal; if (match(II->getArgOperand(0), - m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { - CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS}); - CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS}); - return SelectInst::Create(Cond, Call0, Call1); + m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) { + // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF + if (isa(TVal) && isa(FVal)) { + CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal}); + CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal}); + return SelectInst::Create(Cond, AbsT, AbsF); + } + // fabs (select Cond, -FVal, FVal) --> fabs FVal + if (match(TVal, m_FNeg(m_Specific(FVal)))) + return replaceOperand(*II, 0, FVal); + // fabs (select Cond, TVal, -TVal) --> fabs TVal + if (match(FVal, m_FNeg(m_Specific(TVal)))) + return replaceOperand(*II, 0, TVal); } LLVM_FALLTHROUGH; diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll index c0d2c3b701f033..f8b70afea3803c 100644 --- a/llvm/test/Transforms/InstCombine/fabs.ll +++ b/llvm/test/Transforms/InstCombine/fabs.ll @@ -751,9 +751,7 @@ define half @select_fcmp_nnan_nsz_uge_negzero_unary_fneg(half %x) { define float @select_fneg(i1 %c, float %x) { ; CHECK-LABEL: @select_fneg( -; CHECK-NEXT: [[N:%.*]] = fneg float [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], float [[N]], float [[X]] -; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[S]]) +; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) ; CHECK-NEXT: ret float [[FABS]] ; %n = fneg float %x @@ -766,8 +764,7 @@ define float @select_fneg_use1(i1 %c, float %x) { ; CHECK-LABEL: @select_fneg_use1( ; CHECK-NEXT: [[N:%.*]] = fneg float [[X:%.*]] ; CHECK-NEXT: call void @use(float [[N]]) -; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], float [[X]], float [[N]] -; CHECK-NEXT: [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[S]]) +; CHECK-NEXT: [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: ret float [[FABS]] ; %n = fneg float %x @@ -782,7 +779,7 @@ define float @select_fneg_use2(i1 %c, float %x) { ; CHECK-NEXT: [[N:%.*]] = fneg arcp float [[X:%.*]] ; CHECK-NEXT: [[S:%.*]] = select i1 [[C:%.*]], float [[N]], float [[X]] ; CHECK-NEXT: call void @use(float [[S]]) -; CHECK-NEXT: [[FABS:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[S]]) +; CHECK-NEXT: [[FABS:%.*]] = call nnan nsz float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: ret float [[FABS]] ; %n = fneg arcp float %x @@ -794,9 +791,7 @@ define float @select_fneg_use2(i1 %c, float %x) { define <2 x float> @select_fneg_vec(<2 x i1> %c, <2 x float> %x) { ; CHECK-LABEL: @select_fneg_vec( -; CHECK-NEXT: [[N:%.*]] = fneg <2 x float> [[X:%.*]] -; CHECK-NEXT: [[S:%.*]] = select fast <2 x i1> [[C:%.*]], <2 x float> [[X]], <2 x float> [[N]] -; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[S]]) +; CHECK-NEXT: [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FABS]] ; %n = fneg <2 x float> %x From 740332b6cce3e59dca4e50d3e2fd0d008f5e9529 Mon Sep 17 00:00:00 2001 From: Georgii Rymar Date: Wed, 12 Aug 2020 16:54:49 +0300 Subject: [PATCH 039/101] [llvm-readobj/elf] - Refine testing of broken Android's packed relocation sections. This uses modern `split-file` tool to merge 5 `packed-relocs-error*.s` tests to a new `packed-relocs-errors.s` and adds testing for GNU style. Differential revision: https://reviews.llvm.org/D85835 --- .../llvm-readobj/ELF/packed-relocs-error1.s | 8 --- .../llvm-readobj/ELF/packed-relocs-error2.s | 8 --- .../llvm-readobj/ELF/packed-relocs-error3.s | 10 --- .../llvm-readobj/ELF/packed-relocs-error4.s | 14 ---- .../llvm-readobj/ELF/packed-relocs-error5.s | 14 ---- .../llvm-readobj/ELF/packed-relocs-errors.s | 66 +++++++++++++++++++ 6 files changed, 66 insertions(+), 54 deletions(-) delete mode 100644 llvm/test/tools/llvm-readobj/ELF/packed-relocs-error1.s delete mode 100644 llvm/test/tools/llvm-readobj/ELF/packed-relocs-error2.s delete mode 100644 llvm/test/tools/llvm-readobj/ELF/packed-relocs-error3.s delete mode 100644 llvm/test/tools/llvm-readobj/ELF/packed-relocs-error4.s delete mode 100644 llvm/test/tools/llvm-readobj/ELF/packed-relocs-error5.s create mode 100644 llvm/test/tools/llvm-readobj/ELF/packed-relocs-errors.s diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error1.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error1.s deleted file mode 100644 index 07fbd78b09ece2..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error1.s +++ /dev/null @@ -1,8 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: invalid packed relocation header - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS9" diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error2.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error2.s deleted file mode 100644 index ea14995e0ded11..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error2.s +++ /dev/null @@ -1,8 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error3.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error3.s deleted file mode 100644 index 766c551295ae62..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error3.s +++ /dev/null @@ -1,10 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" -.sleb128 4 // Number of relocations -.sleb128 0 // Initial offset diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error4.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error4.s deleted file mode 100644 index 191e0b7885c46e..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error4.s +++ /dev/null @@ -1,14 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" -.sleb128 4 // Number of relocations -.sleb128 0 // Initial offset - -.sleb128 2 // Number of relocations in group -.sleb128 2 // RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG -.sleb128 8 // offset delta diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error5.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error5.s deleted file mode 100644 index 8a6d6560f52056..00000000000000 --- a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-error5.s +++ /dev/null @@ -1,14 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -// RUN: llvm-readobj --relocations %t 2>&1 | FileCheck %s -DFILE=%t - -// CHECK: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: relocation group unexpectedly large - -.section .rela.dyn, "a", @0x60000001 -.ascii "APS2" -.sleb128 4 // Number of relocations -.sleb128 0 // Initial offset - -.sleb128 5 // Number of relocations in group -.sleb128 2 // RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG -.sleb128 8 // offset delta diff --git a/llvm/test/tools/llvm-readobj/ELF/packed-relocs-errors.s b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-errors.s new file mode 100644 index 00000000000000..4f2e65ed220f7a --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/packed-relocs-errors.s @@ -0,0 +1,66 @@ +# REQUIRES: x86-registered-target + +## Test that we report meaningful warnings when dumping +## broken Android's packed relocation sections. + +# RUN: split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm1.s -o %t1.o +# RUN: llvm-readobj --relocations %t1.o 2>&1 | FileCheck %s -DFILE=%t1.o --check-prefix=ERR-HEADER +# RUN: llvm-readelf --relocations %t1.o 2>&1 | FileCheck %s -DFILE=%t1.o --check-prefix=ERR-HEADER + +#--- asm1.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS9" + +# ERR-HEADER: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: invalid packed relocation header + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm2.s -o %t2.o +# RUN: llvm-readobj --relocations %t2.o 2>&1 | FileCheck %s -DFILE=%t2.o --check-prefix=ERR-PAST-END +# RUN: llvm-readelf --relocations %t2.o 2>&1 | FileCheck %s -DFILE=%t2.o --check-prefix=ERR-PAST-END + +#--- asm2.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" + +# ERR-PAST-END: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: malformed sleb128, extends past end + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm3.s -o %t3.o +# RUN: llvm-readobj --relocations %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=ERR-PAST-END +# RUN: llvm-readelf --relocations %t3.o 2>&1 | FileCheck %s -DFILE=%t3.o --check-prefix=ERR-PAST-END + +#--- asm3.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" +.sleb128 4 ## Number of relocations +.sleb128 0 ## Initial offset + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm4.s -o %t4.o +# RUN: llvm-readobj --relocations %t4.o 2>&1 | FileCheck %s -DFILE=%t4.o --check-prefix=ERR-PAST-END +# RUN: llvm-readelf --relocations %t4.o 2>&1 | FileCheck %s -DFILE=%t4.o --check-prefix=ERR-PAST-END + +#--- asm4.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" +.sleb128 4 ## Number of relocations +.sleb128 0 ## Initial offset + +.sleb128 2 ## Number of relocations in group +.sleb128 2 ## RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG +.sleb128 8 ## offset delta + +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t/asm5.s -o %t5.o +# RUN: llvm-readobj --relocations %t5.o 2>&1 | FileCheck %s -DFILE=%t5.o --check-prefix=ERR-LARGE +# RUN: llvm-readelf --relocations %t5.o 2>&1 | FileCheck %s -DFILE=%t5.o --check-prefix=ERR-LARGE + +# ERR-LARGE: warning: '[[FILE]]': unable to read relocations from SHT_ANDROID_REL section with index 3: relocation group unexpectedly large + +#--- asm5.s +.section .rela.dyn, "a", @0x60000001 +.ascii "APS2" +.sleb128 4 ## Number of relocations +.sleb128 0 ## Initial offset + +.sleb128 5 ## Number of relocations in group +.sleb128 2 ## RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG +.sleb128 8 ## offset delta From 3ba7777b94d887af594ba8d6c1378166bd361a20 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 25 Jul 2020 13:21:31 -0400 Subject: [PATCH 040/101] AMDGPU/GlobalISel: Fix selection of s1/s16 G_[F]CONSTANT The code to determine the value size was overcomplicated and only correct in the case where the result register already had a register class assigned. We can always take the size directly from the register's type. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 33 +- .../GlobalISel/divergent-control-flow.ll | 26 +- .../AMDGPU/GlobalISel/inst-select-and.mir | 34 ++- .../GlobalISel/inst-select-constant.mir | 282 +++++++++++++----- .../GlobalISel/inst-select-fconstant.mir | 33 +- .../AMDGPU/GlobalISel/inst-select-or.mir | 34 ++- .../AMDGPU/GlobalISel/inst-select-xor.mir | 34 ++- 7 files changed, 318 insertions(+), 158 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 7e842835a5b44d..f2ecc50d472e54 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2043,6 +2043,8 @@ bool AMDGPUInstructionSelector::selectG_SZA_EXT(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineOperand &ImmOp = I.getOperand(1); + Register DstReg = I.getOperand(0).getReg(); + unsigned Size = MRI->getType(DstReg).getSizeInBits(); // The AMDGPU backend only supports Imm operands and not CImm or FPImm. if (ImmOp.isFPImm()) { @@ -2050,26 +2052,27 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const { ImmOp.ChangeToImmediate(Imm.getZExtValue()); } else if (ImmOp.isCImm()) { ImmOp.ChangeToImmediate(ImmOp.getCImm()->getSExtValue()); + } else { + llvm_unreachable("Not supported by g_constants"); } - Register DstReg = I.getOperand(0).getReg(); - unsigned Size; - bool IsSgpr; - const RegisterBank *RB = MRI->getRegBankOrNull(I.getOperand(0).getReg()); - if (RB) { - IsSgpr = RB->getID() == AMDGPU::SGPRRegBankID; - Size = MRI->getType(DstReg).getSizeInBits(); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const bool IsSgpr = DstRB->getID() == AMDGPU::SGPRRegBankID; + + unsigned Opcode; + if (DstRB->getID() == AMDGPU::VCCRegBankID) { + Opcode = STI.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; } else { - const TargetRegisterClass *RC = TRI.getRegClassForReg(*MRI, DstReg); - IsSgpr = TRI.isSGPRClass(RC); - Size = TRI.getRegSizeInBits(*RC); - } + Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - if (Size != 32 && Size != 64) - return false; + // We should never produce s1 values on banks other than VCC. If the user of + // this already constrained the register, we may incorrectly think it's VCC + // if it wasn't originally. + if (Size == 1) + return false; + } - unsigned Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; - if (Size == 32) { + if (Size != 64) { I.setDesc(TII.get(Opcode)); I.addImplicitDefUseOperands(*MF); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index 1f9c3bc60876e2..7564251c755d93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -135,24 +135,24 @@ define void @constrained_if_register_class() { ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cselect_b32 s5, 1, 0 -; CHECK-NEXT: s_xor_b32 s5, s5, -1 -; CHECK-NEXT: s_and_b32 s5, s5, 1 -; CHECK-NEXT: s_mov_b32 s4, -1 -; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_cselect_b32 s4, 1, 0 +; CHECK-NEXT: s_xor_b32 s4, s4, -1 +; CHECK-NEXT: s_and_b32 s4, s4, 1 +; CHECK-NEXT: s_cmp_lg_u32 s4, 0 ; CHECK-NEXT: s_cbranch_scc0 BB4_6 ; CHECK-NEXT: ; %bb.1: ; %bb2 -; CHECK-NEXT: s_getpc_b64 s[6:7] -; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4 -; CHECK-NEXT: s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+4 -; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+4 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v0, s6 -; CHECK-NEXT: v_mov_b32_e32 v1, s7 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 ; CHECK-NEXT: flat_load_dword v0, v[0:1] -; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 +; CHECK-NEXT: s_mov_b32 s4, -1 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 ; CHECK-NEXT: s_xor_b64 s[8:9], vcc, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir index 81437acbbbc53b..7907608432ff19 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-and.mir @@ -420,34 +420,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_AND_B32_1]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) @@ -466,34 +467,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_AND_B32_1]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir index c8762c0d578eb0..20b886ebdadfa3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE64 +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=WAVE32 --- name: constant_v_s32 @@ -9,13 +10,21 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_v_s32 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec - ; GCN: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; WAVE64-LABEL: name: constant_v_s32 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; WAVE32-LABEL: name: constant_v_s32 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] %0:vgpr(s32) = G_CONSTANT i32 0 %1:vgpr(s32) = G_CONSTANT i32 1 %2:vgpr(s32) = G_CONSTANT i32 -1 @@ -32,13 +41,21 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_s_s32 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 - ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 - ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; WAVE64-LABEL: name: constant_s_s32 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE64: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; WAVE32-LABEL: name: constant_s_s32 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE32: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] %0:sgpr(s32) = G_CONSTANT i32 0 %1:sgpr(s32) = G_CONSTANT i32 1 %2:sgpr(s32) = G_CONSTANT i32 -1 @@ -47,22 +64,67 @@ body: | S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 ... -# FIXME -# --- -# name: constant_v_s16 -# legalized: true -# regBankSelected: true -# tracksRegLiveness: true - -# body: | -# bb.0: -# %0:vgpry(s16) = G_CONSTANT i16 0 -# %1:vgpr(s16) = G_CONSTANT i16 1 -# %2:vgpr(s16) = G_CONSTANT i16 -1 -# %3:vgpr(s16) = G_CONSTANT i16 -54 -# %4:vgpr(s16) = G_CONSTANT i16 27 -# S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 -# ... +--- +name: constant_v_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_v_s16 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + ; WAVE32-LABEL: name: constant_v_s16 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -54, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]], implicit [[V_MOV_B32_e32_4]] + %0:vgpr(s16) = G_CONSTANT i16 0 + %1:vgpr(s16) = G_CONSTANT i16 1 + %2:vgpr(s16) = G_CONSTANT i16 -1 + %3:vgpr(s16) = G_CONSTANT i16 -54 + %4:vgpr(s16) = G_CONSTANT i16 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... + +--- +name: constant_s_s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_s_s16 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE64: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + ; WAVE32-LABEL: name: constant_s_s16 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 -54 + ; WAVE32: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 27 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]], implicit [[S_MOV_B32_4]] + %0:sgpr(s16) = G_CONSTANT i16 0 + %1:sgpr(s16) = G_CONSTANT i16 1 + %2:sgpr(s16) = G_CONSTANT i16 -1 + %3:sgpr(s16) = G_CONSTANT i16 -54 + %4:sgpr(s16) = G_CONSTANT i16 27 + S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4 +... --- name: constant_v_s64 @@ -72,32 +134,59 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_v_s64 - ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; GCN: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec - ; GCN: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec - ; GCN: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec - ; GCN: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec - ; GCN: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 - ; GCN: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec - ; GCN: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec - ; GCN: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE64-LABEL: name: constant_v_s64 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE64: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE64: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 + ; WAVE64: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE64: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec + ; WAVE64: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 + ; WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] + ; WAVE32-LABEL: name: constant_v_s64 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_2]], %subreg.sub0, [[V_MOV_B32_e32_3]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_5:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_4]], %subreg.sub0, [[V_MOV_B32_e32_5]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_6:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967242, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_7:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; WAVE32: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_8:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 27, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_9:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_8]], %subreg.sub0, [[V_MOV_B32_e32_9]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_10:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_11:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[REG_SEQUENCE5:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_10]], %subreg.sub0, [[V_MOV_B32_e32_11]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_12:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_13:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; WAVE32: [[REG_SEQUENCE6:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_12]], %subreg.sub0, [[V_MOV_B32_e32_13]], %subreg.sub1 + ; WAVE32: [[V_MOV_B32_e32_14:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 23255, implicit $exec + ; WAVE32: [[V_MOV_B32_e32_15:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec + ; WAVE32: [[REG_SEQUENCE7:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_14]], %subreg.sub0, [[V_MOV_B32_e32_15]], %subreg.sub1 + ; WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]], implicit [[REG_SEQUENCE4]], implicit [[REG_SEQUENCE5]], implicit [[REG_SEQUENCE6]], implicit [[REG_SEQUENCE7]] %0:vgpr(s64) = G_CONSTANT i64 0 %1:vgpr(s64) = G_CONSTANT i64 1 %2:vgpr(s64) = G_CONSTANT i64 -1 @@ -117,24 +206,43 @@ tracksRegLiveness: true body: | bb.0: - ; GCN-LABEL: name: constant_s_s64 - ; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 - ; GCN: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 - ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 - ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GCN: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 - ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 - ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 - ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 - ; GCN: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 - ; GCN: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 - ; GCN: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 - ; GCN: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE64-LABEL: name: constant_s_s64 + ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; WAVE64: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 + ; WAVE64: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 + ; WAVE64: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; WAVE64: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 + ; WAVE64: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; WAVE64: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 + ; WAVE64: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE64: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 + ; WAVE64: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE64: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 + ; WAVE64: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; WAVE32-LABEL: name: constant_s_s64 + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; WAVE32: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 1 + ; WAVE32: [[S_MOV_B64_2:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967242 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 + ; WAVE32: [[S_MOV_B64_3:%[0-9]+]]:sreg_64 = S_MOV_B64 27 + ; WAVE32: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967295 + ; WAVE32: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_2]], %subreg.sub0, [[S_MOV_B32_3]], %subreg.sub1 + ; WAVE32: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; WAVE32: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_4]], %subreg.sub0, [[S_MOV_B32_5]], %subreg.sub1 + ; WAVE32: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 23255 + ; WAVE32: [[S_MOV_B32_7:%[0-9]+]]:sreg_32 = S_MOV_B32 -16 + ; WAVE32: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_6]], %subreg.sub0, [[S_MOV_B32_7]], %subreg.sub1 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]], implicit [[S_MOV_B64_2]], implicit [[REG_SEQUENCE]], implicit [[S_MOV_B64_3]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] %0:sgpr(s64) = G_CONSTANT i64 0 %1:sgpr(s64) = G_CONSTANT i64 1 %2:sgpr(s64) = G_CONSTANT i64 -1 @@ -145,3 +253,27 @@ body: | %7:sgpr(s64) = G_CONSTANT i64 18446744004990098135 S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 ... + +--- + +name: constant_i1_vcc +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + ; WAVE64-LABEL: name: constant_i1_vcc + ; WAVE64: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; WAVE64: [[S_MOV_B64_1:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; WAVE64: S_ENDPGM 0, implicit [[S_MOV_B64_]], implicit [[S_MOV_B64_1]] + ; WAVE32-LABEL: name: constant_i1_vcc + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]] + %0:vcc(s1) = G_CONSTANT i1 true + %1:vcc(s1) = G_CONSTANT i1 false + S_ENDPGM 0 , implicit %0 , implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir index 9afa4b08c0ecb8..96e65617e33608 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fconstant.mir @@ -14,12 +14,16 @@ body: | ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1090519040, implicit $exec - ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]], implicit [[V_MOV_B32_e32_1]], implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s32) = G_FCONSTANT float 1.0 %1:vgpr(s32) = G_FCONSTANT float 8.0 %2:vgpr(s32) = G_FCONSTANT float 1.0 %3:vgpr(s32) = G_FCONSTANT float 8.0 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2 , implicit %3 + $vgpr0 = COPY %0 + $vgpr1 = COPY %1 + S_ENDPGM 0, implicit %2 , implicit %3 ... --- @@ -37,14 +41,14 @@ body: | ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 3238002688 ; GCN: $sgpr0 = COPY [[S_MOV_B32_]] ; GCN: $sgpr1 = COPY [[S_MOV_B32_1]] - ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]], implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] + ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s32) = G_FCONSTANT float 1.0 %1:sgpr(s32) = G_FCONSTANT float 8.0 %2:sgpr(s32) = G_FCONSTANT float -1.0 %3:sgpr(s32) = G_FCONSTANT float -8.0 $sgpr0 = COPY %0 $sgpr1 = COPY %1 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2 , implicit %3 + S_ENDPGM 0, implicit %2 , implicit %3 ... @@ -71,14 +75,14 @@ body: | ; GCN: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_6]], %subreg.sub0, [[V_MOV_B32_e32_7]], %subreg.sub1 ; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]] ; GCN: $vgpr2_vgpr3 = COPY [[REG_SEQUENCE1]] - ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]], implicit [[REG_SEQUENCE1]], implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] + ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE2]], implicit [[REG_SEQUENCE3]] %0:vgpr(s64) = G_FCONSTANT double 1.0 %1:vgpr(s64) = G_FCONSTANT double 8.0 %2:vgpr(s64) = G_FCONSTANT double -2.0 %3:vgpr(s64) = G_FCONSTANT double 10.0 $vgpr0_vgpr1 = COPY %0 $vgpr2_vgpr3 = COPY %1 - S_ENDPGM 0, implicit %0 , implicit %1 , implicit %2 , implicit %3 + S_ENDPGM 0, implicit %2 , implicit %3 ... @@ -122,14 +126,22 @@ body: | ; GCN-LABEL: name: fconstant_v_s16 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec + ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec + ; GCN: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 18432, implicit $exec ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GCN: $vgpr1 = COPY [[V_MOV_B32_e32_1]] + ; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_2]], implicit [[V_MOV_B32_e32_3]] %0:vgpr(s16) = G_FCONSTANT half 1.0 %1:vgpr(s16) = G_FCONSTANT half 8.0 %2:vgpr(s32) = G_ANYEXT %0 %3:vgpr(s32) = G_ANYEXT %1 + + ; Test without already assigned register class + %4:vgpr(s16) = G_FCONSTANT half 1.0 + %5:vgpr(s16) = G_FCONSTANT half 8.0 $vgpr0 = COPY %2 $vgpr1 = COPY %3 + S_ENDPGM 0, implicit %4, implicit %5 ... @@ -146,14 +158,21 @@ body: | ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] + ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 15360 + ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 18432 ; GCN: $sgpr0 = COPY [[COPY]] ; GCN: $sgpr1 = COPY [[COPY1]] + ; GCN: S_ENDPGM 0, implicit [[S_MOV_B32_2]], implicit [[S_MOV_B32_3]] %0:sgpr(s16) = G_FCONSTANT half 1.0 %1:sgpr(s16) = G_FCONSTANT half 8.0 %2:vgpr(s32) = G_ANYEXT %0 %3:vgpr(s32) = G_ANYEXT %1 + + ; Test without already assigned register class + %4:sgpr(s16) = G_FCONSTANT half 1.0 + %5:sgpr(s16) = G_FCONSTANT half 8.0 $sgpr0 = COPY %2 $sgpr1 = COPY %3 + S_ENDPGM 0, implicit %4, implicit %5 ... - diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir index 7f1f52d2c522ac..966bb8c629500a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-or.mir @@ -420,34 +420,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_OR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) @@ -466,34 +467,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_OR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_OR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir index f923a4c9f02b81..0364cb736c601a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-xor.mir @@ -421,34 +421,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave32 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[S_XOR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) @@ -467,34 +468,35 @@ regBankSelected: true tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr0 ; WAVE64-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE64: liveins: $vgpr0 + ; WAVE64: liveins: $vgpr0, $sgpr0 ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE64: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE64: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE64: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B64_]] ; WAVE64: S_ENDPGM 0, implicit [[COPY1]] ; WAVE32-LABEL: name: copy_select_constrain_vcc_result_reg_wave64 - ; WAVE32: liveins: $vgpr0 + ; WAVE32: liveins: $vgpr0, $sgpr0 ; WAVE32: $vcc_hi = IMPLICIT_DEF ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; WAVE32: %sgpr0:sreg_32 = COPY $sgpr0 ; WAVE32: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[V_AND_B32_e32_]], implicit $exec - ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, %sgpr0, implicit-def $scc ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec ; WAVE32: [[S_XOR_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_XOR_B32 [[V_CMP_NE_U32_e64_]], [[V_CMP_NE_U32_e64_1]], implicit-def dead $scc ; WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY [[S_XOR_B32_]] ; WAVE32: S_ENDPGM 0, implicit [[COPY1]] %1:vgpr(s32) = COPY $vgpr0 %0:vgpr(s1) = G_TRUNC %1(s32) - %2:sgpr(s1) = G_CONSTANT i1 true + %sgpr0:sgpr(s32) = COPY $sgpr0 + %2:sgpr(s1) = G_TRUNC %sgpr0 %6:sgpr(s32) = G_CONSTANT i32 0 %7:sgpr(p1) = G_IMPLICIT_DEF %9:vcc(s1) = COPY %0(s1) From 2f5f5febf3e4fa9bc80e8a8f63a99d3e6813c499 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 26 Jul 2020 15:43:48 -0400 Subject: [PATCH 041/101] AMDGPU/GlobalISel: Select llvm.amdgcn.groupstaticsize Previously, it would successfully select and assert if not HSA or PAL when expanding the pseudoinstruction. We don't need the pseudoinstruction anymore since we know the total size after legalization. --- .../AMDGPU/AMDGPUInstructionSelector.cpp | 29 ++++++++++++ .../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 + .../inst-select-amdgcn.groupstaticsize.mir | 46 +++++++++++++++++++ .../AMDGPU/llvm.amdgcn.groupstaticsize.ll | 4 ++ 4 files changed, 80 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f2ecc50d472e54..c9f9eb6988f15a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -930,6 +930,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { return selectBallot(I); case Intrinsic::amdgcn_reloc_constant: return selectRelocConstant(I); + case Intrinsic::amdgcn_groupstaticsize: + return selectGroupStaticSize(I); case Intrinsic::returnaddress: return selectReturnAddress(I); default: @@ -1137,6 +1139,33 @@ bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const { return true; } +bool AMDGPUInstructionSelector::selectGroupStaticSize(MachineInstr &I) const { + Triple::OSType OS = MF->getTarget().getTargetTriple().getOS(); + + Register DstReg = I.getOperand(0).getReg(); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + unsigned Mov = DstRB->getID() == AMDGPU::SGPRRegBankID ? + AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; + + MachineBasicBlock *MBB = I.getParent(); + const DebugLoc &DL = I.getDebugLoc(); + + auto MIB = BuildMI(*MBB, &I, DL, TII.get(Mov), DstReg); + + if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) { + const SIMachineFunctionInfo *MFI = MF->getInfo(); + MIB.addImm(MFI->getLDSSize()); + } else { + Module *M = MF->getFunction().getParent(); + const GlobalValue *GV + = Intrinsic::getDeclaration(M, Intrinsic::amdgcn_groupstaticsize); + MIB.addGlobalAddress(GV, 0, SIInstrInfo::MO_ABS32_LO); + } + + I.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { MachineBasicBlock *MBB = I.getParent(); MachineFunction &MF = *MBB->getParent(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index b18867299baf9a..969ef59363085f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -110,6 +110,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectBallot(MachineInstr &I) const; bool selectRelocConstant(MachineInstr &I) const; + bool selectGroupStaticSize(MachineInstr &I) const; bool selectReturnAddress(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir new file mode 100644 index 00000000000000..4e45fe689dd74f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.groupstaticsize.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSAPAL %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefix=MESA %s + +--- +name: groupstaticsize_v +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 4096 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_v + ; HSAPAL: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; HSAPAL: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + ; MESA-LABEL: name: groupstaticsize_v + ; MESA: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize, implicit $exec + ; MESA: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]] + %0:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... + +--- +name: groupstaticsize_s +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + ldsSize: 1024 + +body: | + bb.0: + + ; HSAPAL-LABEL: name: groupstaticsize_s + ; HSAPAL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; HSAPAL: S_ENDPGM 0, implicit [[S_MOV_B32_]] + ; MESA-LABEL: name: groupstaticsize_s + ; MESA: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @llvm.amdgcn.groupstaticsize + ; MESA: S_ENDPGM 0, implicit [[S_MOV_B32_]] + %0:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.groupstaticsize) + S_ENDPGM 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll index 3224d8a3594ad8..db4032efceabb3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll @@ -2,6 +2,10 @@ ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,NOHSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,HSA %s + @lds0 = addrspace(3) global [512 x float] undef, align 4 @lds1 = addrspace(3) global [256 x float] undef, align 4 From d5621b83a58c1faaa0e413ac7c4f0ca8811d0c61 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Aug 2020 14:52:23 +0100 Subject: [PATCH 042/101] [X86][AVX] lowerShuffleWithVTRUNC - pull out TRUNCATE/VTRUNC creation into helper code. NFCI. Prep work toward adding v16i16/v32i8 support for lowerShuffleWithVTRUNC and improving lowerShuffleWithVPMOV. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 +++++++++++++++---------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 27dee97edb2fd8..6238366f6c268c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11286,6 +11286,37 @@ static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT, return false; } +// Helper to create TRUNCATE/VTRUNC nodes, optionally with zero/undef upper +// element padding to the final DstVT. +static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, + const X86Subtarget &Subtarget, + SelectionDAG &DAG, bool ZeroUppers) { + MVT SrcVT = Src.getSimpleValueType(); + unsigned NumDstElts = DstVT.getVectorNumElements(); + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + + // Perform a direct ISD::TRUNCATE if possible. + if (NumSrcElts == NumDstElts) + return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src); + + if (NumSrcElts > NumDstElts) { + MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); + return extractSubVector(Trunc, 0, DAG, DL, DstVT.getSizeInBits()); + } + + // Non-VLX targets must truncate from a 512-bit type, so we need to + // widen, truncate and then possibly extract the original subvector. + if (!Subtarget.hasVLX() && !SrcVT.is512BitVector()) { + SDValue NewSrc = widenSubVector(Src, ZeroUppers, Subtarget, DAG, DL, 512); + return getAVX512TruncNode(DL, DstVT, NewSrc, Subtarget, DAG, ZeroUppers); + } + + // Fallback to a X86ISD::VTRUNC. + // TODO: Handle cases where we go from 512-bit vectors to sub-128-bit vectors. + return DAG.getNode(X86ISD::VTRUNC, DL, DstVT, Src); +} + static bool matchShuffleAsVPMOV(ArrayRef Mask, bool SwappedOps, int Delta) { int Size = (int)Mask.size(); @@ -11388,7 +11419,7 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); - unsigned MaxScale = 64 / VT.getScalarSizeInBits(); + unsigned MaxScale = 64 / EltSizeInBits; for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { // TODO: Support non-BWI VPMOVWB truncations? unsigned SrcEltBits = EltSizeInBits * Scale; @@ -11408,36 +11439,18 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, if (UpperElts > 0 && !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) continue; + bool UndefUppers = + UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts); // As we're using both sources then we need to concat them together - // and truncate from the 256-bit src. + // and truncate from the double-sized src. MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2); SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2); MVT SrcSVT = MVT::getIntegerVT(SrcEltBits); MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts); Src = DAG.getBitcast(SrcVT, Src); - - if (SrcVT.getVectorNumElements() == NumElts) - return DAG.getNode(ISD::TRUNCATE, DL, VT, Src); - - if (!Subtarget.hasVLX()) { - // Non-VLX targets must truncate from a 512-bit type, so we need to - // widen, truncate and then possibly extract the original 128-bit - // vector. - bool UndefUppers = isUndefInRange(Mask, NumSrcElts, UpperElts); - Src = widenSubVector(Src, !UndefUppers, Subtarget, DAG, DL, 512); - unsigned NumWideSrcElts = Src.getValueType().getVectorNumElements(); - if (NumWideSrcElts >= NumElts) { - // Widening means we can now use a regular TRUNCATE. - MVT WideVT = MVT::getVectorVT(VT.getScalarType(), NumWideSrcElts); - SDValue WideRes = DAG.getNode(ISD::TRUNCATE, DL, WideVT, Src); - if (!WideVT.is128BitVector()) - WideRes = extract128BitVector(WideRes, 0, DAG, DL); - return WideRes; - } - } - return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src); + return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); } return SDValue(); From c98fcba55cf615b078b3943ee9e65356ba23414b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 18 Aug 2020 10:14:07 -0400 Subject: [PATCH 043/101] [SLP] remove instcombine dependency from regression test; NFC InstCombine doesn't do that much here - sinks some instructions and improves alignments - but that should not be part of the SLP pass unit testing. --- .../Transforms/SLPVectorizer/X86/limit.ll | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/limit.ll b/llvm/test/Transforms/SLPVectorizer/X86/limit.ll index 41db490a754f6b..e6d78c0c0e378e 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/limit.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/limit.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s --instcombine -slp-vectorizer -S | FileCheck %s +; RUN: opt < %s -slp-vectorizer -S | FileCheck %s + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -31,40 +32,40 @@ define void @addsub() { ; CHECK-NEXT: ret void ; entry: - %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 0), align 4 - %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 0), align 4 - %add = add nsw i32 %0, %1 br label %bb1 -bb1: ; preds = %entry - %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 0), align 4 - %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 0), align 4 + +bb1: + %0 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 0), align 16 + %1 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 0), align 16 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 0), align 16 + %3 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 0), align 16 %add1 = add nsw i32 %2, %3 %add2 = add nsw i32 %add, %add1 - store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 0), align 4 - %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 1), align 4 - %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 1), align 4 + store i32 %add2, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), align 16 + %4 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 1), align 4 + %5 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 1), align 4 %add3 = add nsw i32 %4, %5 - %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 1), align 4 - %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 1), align 4 + %6 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 1), align 4 + %7 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 1), align 4 %add4 = add nsw i32 %6, %7 %sub = sub nsw i32 %add3, %add4 - store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 1), align 4 - %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 2), align 4 - %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 2), align 4 + store i32 %sub, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 1), align 4 + %8 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 2), align 8 + %9 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 2), align 8 %add5 = add nsw i32 %8, %9 - %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 2), align 4 - %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 2), align 4 + %10 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 2), align 8 + %11 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 2), align 8 %add6 = add nsw i32 %10, %11 %add7 = add nsw i32 %add5, %add6 - store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 2), align 4 - %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i32 0, i64 3), align 4 - %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i32 0, i64 3), align 4 + store i32 %add7, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 2), align 8 + %12 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i64 3), align 4 + %13 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @c, i64 0, i64 3), align 4 %add8 = add nsw i32 %12, %13 - %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i32 0, i64 3), align 4 - %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i32 0, i64 3), align 4 + %14 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @d, i64 0, i64 3), align 4 + %15 = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @e, i64 0, i64 3), align 4 %add9 = add nsw i32 %14, %15 %sub10 = sub nsw i32 %add8, %add9 - store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i32 0, i64 3), align 4 + store i32 %sub10, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 3), align 4 ret void } - From 011bf4fd9679c8a7dd7e3a6fc9a696e417ce3c53 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Aug 2020 15:24:28 +0100 Subject: [PATCH 044/101] [X86][AVX] lowerShuffleWithVTRUNC - extend to support v16i16/v32i8 binary shuffles. This requires a few additional SrcVT vs DstVT padding cases in getAVX512TruncNode. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 34 ++- llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll | 251 ++---------------- .../CodeGen/X86/vector-shuffle-256-v32.ll | 20 +- .../CodeGen/X86/x86-interleaved-access.ll | 53 ++-- 4 files changed, 79 insertions(+), 279 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6238366f6c268c..0fbabdc5dfdf0b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11292,19 +11292,28 @@ static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, const X86Subtarget &Subtarget, SelectionDAG &DAG, bool ZeroUppers) { MVT SrcVT = Src.getSimpleValueType(); + MVT DstSVT = DstVT.getScalarType(); unsigned NumDstElts = DstVT.getVectorNumElements(); unsigned NumSrcElts = SrcVT.getVectorNumElements(); + unsigned DstEltSizeInBits = DstVT.getScalarSizeInBits(); // Perform a direct ISD::TRUNCATE if possible. if (NumSrcElts == NumDstElts) return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src); if (NumSrcElts > NumDstElts) { - MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts); + MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); return extractSubVector(Trunc, 0, DAG, DL, DstVT.getSizeInBits()); } + if ((NumSrcElts * DstEltSizeInBits) >= 128) { + MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); + return widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, + DstVT.getSizeInBits()); + } + // Non-VLX targets must truncate from a 512-bit type, so we need to // widen, truncate and then possibly extract the original subvector. if (!Subtarget.hasVLX() && !SrcVT.is512BitVector()) { @@ -11312,9 +11321,13 @@ static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, return getAVX512TruncNode(DL, DstVT, NewSrc, Subtarget, DAG, ZeroUppers); } - // Fallback to a X86ISD::VTRUNC. - // TODO: Handle cases where we go from 512-bit vectors to sub-128-bit vectors. - return DAG.getNode(X86ISD::VTRUNC, DL, DstVT, Src); + // Fallback to a X86ISD::VTRUNC, padding if necessary. + MVT TruncVT = MVT::getVectorVT(DstSVT, 128 / DstEltSizeInBits); + SDValue Trunc = DAG.getNode(X86ISD::VTRUNC, DL, TruncVT, Src); + if (DstVT != TruncVT) + Trunc = widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, + DstVT.getSizeInBits()); + return Trunc; } static bool matchShuffleAsVPMOV(ArrayRef Mask, bool SwappedOps, @@ -11413,7 +11426,8 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { - assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type"); + assert((VT.is128BitVector() || VT.is256BitVector()) && + "Unexpected VTRUNC type"); if (!Subtarget.hasAVX512()) return SDValue(); @@ -16893,6 +16907,11 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef Mask, Subtarget)) return V; + // Try to use lower using a truncation. + if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i16, V1, V2, Mask, Zeroable, + Subtarget, DAG)) + return V; + // Try to use shift instructions. if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -17003,6 +17022,11 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef Mask, Subtarget)) return V; + // Try to use lower using a truncation. + if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v32i8, V1, V2, Mask, Zeroable, + Subtarget, DAG)) + return V; + // Try to use shift instructions. if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll index 6c5f5125109dbd..de13135ebb5310 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -176,89 +176,12 @@ define void @trunc_v8i64_to_v8i32(<16 x i32>* %L, <8 x i32>* %S) nounwind { } define void @shuffle_v64i8_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind { -; AVX512F-LABEL: shuffle_v64i8_to_v16i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512F-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512F-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v64i8_to_v16i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512VL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VL-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VL-NEXT: vpmovdb %ymm1, %xmm1 -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v64i8_to_v16i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512BW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: shuffle_v64i8_to_v16i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512BWVL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BWVL-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BWVL-NEXT: vpmovdb %ymm1, %xmm1 -; AVX512BWVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512BWVL-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq -; -; AVX512VBMI-LABEL: shuffle_v64i8_to_v16i8: -; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512VBMI-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VBMI-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512VBMI-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512VBMI-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VBMI-NEXT: vpmovdb %zmm1, %xmm1 -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VBMI-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VBMI-NEXT: vzeroupper -; AVX512VBMI-NEXT: retq -; -; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v16i8: -; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60] -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VBMIVL-NEXT: vpermt2b 32(%rdi), %ymm0, %ymm1 -; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi) -; AVX512VBMIVL-NEXT: vzeroupper -; AVX512VBMIVL-NEXT: retq +; AVX512-LABEL: shuffle_v64i8_to_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, (%rsi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %L %strided.vec = shufflevector <64 x i8> %vec, <64 x i8> undef, <16 x i32> store <16 x i8> %strided.vec, <16 x i8>* %S @@ -280,80 +203,12 @@ define void @trunc_v16i32_to_v16i8(<64 x i8>* %L, <16 x i8>* %S) nounwind { } define void @shuffle_v32i16_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind { -; AVX512F-LABEL: shuffle_v32i16_to_v8i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512F-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 -; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i16_to_v8i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,0,1,8,9,8,9,10,11,12,13,14,15] -; AVX512VL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VL-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1 -; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v32i16_to_v8i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512BW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28] -; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BWVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1 -; AVX512BWVL-NEXT: vmovdqa %xmm1, (%rsi) -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq -; -; AVX512VBMI-LABEL: shuffle_v32i16_to_v8i16: -; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpblendw {{.*#+}} xmm1 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512VBMI-NEXT: vpblendw {{.*#+}} xmm0 = mem[0],xmm0[1,2,3],mem[4],xmm0[5,6,7] -; AVX512VBMI-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VBMI-NEXT: vpmovqw %zmm1, %xmm1 -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX512VBMI-NEXT: vmovdqa %xmm0, (%rsi) -; AVX512VBMI-NEXT: vzeroupper -; AVX512VBMI-NEXT: retq -; -; AVX512VBMIVL-LABEL: shuffle_v32i16_to_v8i16: -; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,4,8,12,16,20,24,28] -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VBMIVL-NEXT: vpermt2w 32(%rdi), %ymm0, %ymm1 -; AVX512VBMIVL-NEXT: vmovdqa %xmm1, (%rsi) -; AVX512VBMIVL-NEXT: vzeroupper -; AVX512VBMIVL-NEXT: retq +; AVX512-LABEL: shuffle_v32i16_to_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovqw %zmm0, (%rsi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %vec = load <32 x i16>, <32 x i16>* %L %strided.vec = shufflevector <32 x i16> %vec, <32 x i16> undef, <8 x i32> store <8 x i16> %strided.vec, <8 x i16>* %S @@ -375,81 +230,13 @@ define void @trunc_v8i64_to_v8i16(<32 x i16>* %L, <8 x i16>* %S) nounwind { } define void @shuffle_v64i8_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind { -; AVX512F-LABEL: shuffle_v64i8_to_v8i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512F-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512F-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX512F-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512F-NEXT: vpmovqb %zmm1, %xmm1 -; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] -; AVX512F-NEXT: vmovq %xmm0, (%rsi) -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v64i8_to_v8i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VL-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512VL-NEXT: vpmovqb %ymm1, %xmm1 -; AVX512VL-NEXT: vpmovqb %ymm0, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512VL-NEXT: vmovq %xmm0, (%rsi) -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v64i8_to_v8i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512BW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512BW-NEXT: vpmovqb %zmm1, %xmm1 -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] -; AVX512BW-NEXT: vmovq %xmm0, (%rsi) -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: shuffle_v64i8_to_v8i8: -; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BWVL-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512BWVL-NEXT: vpmovqb %ymm1, %xmm1 -; AVX512BWVL-NEXT: vpmovqb %ymm0, %xmm0 -; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BWVL-NEXT: vmovq %xmm0, (%rsi) -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq -; -; AVX512VBMI-LABEL: shuffle_v64i8_to_v8i8: -; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = -; AVX512VBMI-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; AVX512VBMI-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512VBMI-NEXT: vpshufb %xmm1, %xmm2, %xmm1 -; AVX512VBMI-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; AVX512VBMI-NEXT: vmovdqa (%rdi), %ymm1 -; AVX512VBMI-NEXT: vpmovqb %zmm1, %xmm1 -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3] -; AVX512VBMI-NEXT: vmovq %xmm0, (%rsi) -; AVX512VBMI-NEXT: vzeroupper -; AVX512VBMI-NEXT: retq -; -; AVX512VBMIVL-LABEL: shuffle_v64i8_to_v8i8: -; AVX512VBMIVL: # %bb.0: -; AVX512VBMIVL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VBMIVL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4048780183313844224,4048780183313844224,4048780183313844224,4048780183313844224] -; AVX512VBMIVL-NEXT: vpermi2b 32(%rdi), %ymm0, %ymm1 -; AVX512VBMIVL-NEXT: vmovq %xmm1, (%rsi) -; AVX512VBMIVL-NEXT: vzeroupper -; AVX512VBMIVL-NEXT: retq +; AVX512-LABEL: shuffle_v64i8_to_v8i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512-NEXT: vmovq %xmm0, (%rsi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %vec = load <64 x i8>, <64 x i8>* %L %strided.vec = shufflevector <64 x i8> %vec, <64 x i8> undef, <8 x i32> store <8 x i8> %strided.vec, <8 x i8>* %S diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index e5285aebda69e3..b2c0acdf9b2287 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -4843,19 +4843,13 @@ define <32 x i8> @shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_ ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq ; -; AVX512VLBW-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512VLBW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpsrlw $8, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512VLBW-NEXT: retq -; -; AVX512VLVBMI-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: -; AVX512VLVBMI: # %bb.0: -; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63] -; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0 -; AVX512VLVBMI-NEXT: retq +; AVX512VL-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsrlw $8, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512VL-NEXT: retq ; ; XOPAVX1-LABEL: shuffle_v32i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62: ; XOPAVX1: # %bb.0: diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index 047978c8a0dac7..a540d04626ae83 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -576,46 +576,41 @@ define <16 x i1> @interleaved_load_vf16_i8_stride4(<64 x i8>* %ptr) { ; ; AVX512-LABEL: interleaved_load_vf16_i8_stride4: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = -; AVX512-NEXT: vpshufb %xmm4, %xmm3, %xmm5 -; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm4 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; AVX512-NEXT: vmovdqa (%rdi), %ymm5 -; AVX512-NEXT: vpmovdb %zmm5, %xmm5 -; AVX512-NEXT: vpblendd {{.*#+}} xmm8 = xmm5[0,1],xmm4[2,3] +; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0 +; AVX512-NEXT: vpmovdb %zmm0, %xmm8 +; AVX512-NEXT: vmovdqa (%rdi), %xmm1 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX512-NEXT: vmovdqa 32(%rdi), %xmm3 +; AVX512-NEXT: vmovdqa 48(%rdi), %xmm4 ; AVX512-NEXT: vmovdqa {{.*#+}} xmm5 = -; AVX512-NEXT: vpshufb %xmm5, %xmm3, %xmm6 -; AVX512-NEXT: vpshufb %xmm5, %xmm2, %xmm5 +; AVX512-NEXT: vpshufb %xmm5, %xmm4, %xmm6 +; AVX512-NEXT: vpshufb %xmm5, %xmm3, %xmm5 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm6, %xmm1, %xmm7 -; AVX512-NEXT: vpshufb %xmm6, %xmm0, %xmm6 +; AVX512-NEXT: vpshufb %xmm6, %xmm2, %xmm7 +; AVX512-NEXT: vpshufb %xmm6, %xmm1, %xmm6 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] ; AVX512-NEXT: vpblendd {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = -; AVX512-NEXT: vpshufb %xmm6, %xmm3, %xmm7 -; AVX512-NEXT: vpshufb %xmm6, %xmm2, %xmm6 +; AVX512-NEXT: vpshufb %xmm6, %xmm4, %xmm7 +; AVX512-NEXT: vpshufb %xmm6, %xmm3, %xmm6 ; AVX512-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm7 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm7, %xmm1, %xmm4 -; AVX512-NEXT: vpshufb %xmm7, %xmm0, %xmm7 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,3] +; AVX512-NEXT: vpshufb %xmm7, %xmm2, %xmm0 +; AVX512-NEXT: vpshufb %xmm7, %xmm1, %xmm7 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm7[0],xmm0[0],xmm7[1],xmm0[1] +; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3] ; AVX512-NEXT: vmovdqa {{.*#+}} xmm6 = +; AVX512-NEXT: vpshufb %xmm6, %xmm4, %xmm4 ; AVX512-NEXT: vpshufb %xmm6, %xmm3, %xmm3 -; AVX512-NEXT: vpshufb %xmm6, %xmm2, %xmm2 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> -; AVX512-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = <3,7,11,15,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX512-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX512-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; AVX512-NEXT: vpcmpeqb %zmm5, %zmm8, %k0 -; AVX512-NEXT: vpcmpeqb %zmm0, %zmm4, %k1 +; AVX512-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 ; AVX512-NEXT: kxnorw %k1, %k0, %k0 ; AVX512-NEXT: vpmovm2b %k0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 From abd33bf5eff2419e0f49ce494039bceefe8e1085 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Aug 2020 15:46:02 +0100 Subject: [PATCH 045/101] [X86][AVX] lowerShuffleWithPERMV - pad 128/256-bit shuffles on non-VLX targets Allow non-VLX targets to use 512-bits VPERMV/VPERMV3 for 128/256-bit shuffles. TBH I'm not sure these targets actually exist in the wild, but we're testing for them and its good test coverage for shuffle lowering/combines across different subvector widths. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 59 ++++++++++------ .../X86/shuffle-strided-with-offset-512.ll | 67 ++++++------------- llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll | 30 ++++----- 3 files changed, 70 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0fbabdc5dfdf0b..ec4d236dc3ea19 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14969,17 +14969,35 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, Mask, Subtarget, DAG); } +// Lowers unary/binary shuffle as VPERMV/VPERMV3, for non-VLX targets, +// sub-512-bit shuffles are padded to 512-bits for the shuffle and then +// the active subvector is extracted. static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { + ArrayRef Mask, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); - SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true); + + MVT ShuffleVT = VT; + if (!VT.is512BitVector() && !Subtarget.hasVLX()) { + V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512); + V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512); + MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); + ShuffleVT = V1.getSimpleValueType(); + } + + SDValue Result; if (V2.isUndef()) - return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1); + Result = DAG.getNode(X86ISD::VPERMV, DL, ShuffleVT, MaskNode, V1); + else + Result = DAG.getNode(X86ISD::VPERMV3, DL, ShuffleVT, V1, MaskNode, V2); + + if (VT != ShuffleVT) + Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits()); - return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2); + return Result; } /// Generic lowering of v16i8 shuffles. @@ -15208,9 +15226,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Unpack; - // If we have VBMI we can use one VPERM instead of multiple PSHUFBs. - if (Subtarget.hasVBMI() && Subtarget.hasVLX()) - return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG); + // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8). + if (Subtarget.hasVBMI()) + return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, Subtarget, + DAG); // If we have XOP we can use one VPPERM instead of multiple PSHUFBs. if (Subtarget.hasXOP()) { @@ -16964,9 +16983,9 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return PSHUFB; - // AVX512BWVL can lower to VPERMW. - if (Subtarget.hasBWI() && Subtarget.hasVLX()) - return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG); + // AVX512BW can lower to VPERMW (non-VLX will pad to v32i16). + if (Subtarget.hasBWI()) + return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, Subtarget, DAG); // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. @@ -17069,9 +17088,9 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return PSHUFB; - // AVX512VBMIVL can lower to VPERMB. - if (Subtarget.hasVBMI() && Subtarget.hasVLX()) - return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG); + // AVX512VBMI can lower to VPERMB (non-VLX will pad to v64i8). + if (Subtarget.hasVBMI()) + return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, Subtarget, DAG); // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. @@ -17325,7 +17344,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Blend; - return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, Subtarget, DAG); } /// Handle lowering of 16-lane 32-bit floating point shuffles. @@ -17384,7 +17403,7 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef Mask, V1, V2, DAG, Subtarget)) return V; - return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, Subtarget, DAG); } /// Handle lowering of 8-lane 64-bit integer shuffles. @@ -17447,7 +17466,7 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Blend; - return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, Subtarget, DAG); } /// Handle lowering of 16-lane 32-bit integer shuffles. @@ -17524,7 +17543,7 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Blend; - return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, Subtarget, DAG); } /// Handle lowering of 32-lane 16-bit integer shuffles. @@ -17587,7 +17606,7 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return PSHUFB; - return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, Subtarget, DAG); } /// Handle lowering of 64-lane 8-bit integer shuffles. @@ -17643,7 +17662,7 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef Mask, // VBMI can use VPERMV/VPERMV3 byte shuffles. if (Subtarget.hasVBMI()) - return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG); // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. diff --git a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll index 969ac375a70e33..40cd2fcd4fdeb7 100644 --- a/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-strided-with-offset-512.ll @@ -85,12 +85,10 @@ define void @shuffle_v32i16_to_v16i16_1(<32 x i16>* %L, <16 x i16>* %S) nounwind ; ; AVX512BW-LABEL: shuffle_v32i16_to_v16i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %ymm1 -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u,18,19,22,23,26,27,30,31] -; AVX512BW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,5,7,17,19,21,23,9,11,13,15,25,27,29,31] +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm1[0,2,1,3] ; AVX512BW-NEXT: vmovdqa %ymm0, (%rsi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -260,20 +258,11 @@ define void @shuffle_v32i16_to_v8i16_1(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_1: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <1,5,9,13,17,21,25,29,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_1: @@ -327,20 +316,11 @@ define void @shuffle_v32i16_to_v8i16_2(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [12,13,14,15,4,5,12,13,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [4,5,12,13,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <2,6,10,14,18,22,26,30,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_2: @@ -394,20 +374,11 @@ define void @shuffle_v32i16_to_v8i16_3(<32 x i16>* %L, <8 x i16>* %S) nounwind { ; ; AVX512BW-LABEL: shuffle_v32i16_to_v8i16_3: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX512BW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [12,13,14,15,6,7,14,15,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm4, %xmm3, %xmm3 -; AVX512BW-NEXT: vpshufb %xmm4, %xmm2, %xmm2 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [6,7,14,15,4,5,6,7,8,9,10,11,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm3, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm3, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] -; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi) +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <3,7,11,15,19,23,27,31,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqa (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: shuffle_v32i16_to_v8i16_3: diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll index de13135ebb5310..9e3c92aca5da3a 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll @@ -328,8 +328,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_61: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61] -; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -413,8 +413,8 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_ ; ; AVX512VBMI-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62] -; AVX512VBMI-NEXT: vpermb %zmm0, %zmm1, %zmm0 +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm1 = <1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,62,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vpermt2b %zmm0, %zmm1, %zmm0 ; AVX512VBMI-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI-NEXT: vzeroupper ; AVX512VBMI-NEXT: retq @@ -457,13 +457,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512BW-LABEL: PR34175: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqu (%rdi), %xmm0 -; AVX512BW-NEXT: vmovdqu 32(%rdi), %xmm1 -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] -; AVX512BW-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3] -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512BW-NEXT: vmovdqu (%rdi), %ymm1 +; AVX512BW-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512BW-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512BW-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512BW-NEXT: retq ; @@ -478,13 +475,10 @@ define <4 x double> @PR34175(<32 x i16>* %p) { ; ; AVX512VBMI-LABEL: PR34175: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vmovdqu (%rdi), %xmm0 -; AVX512VBMI-NEXT: vmovdqu 32(%rdi), %xmm1 -; AVX512VBMI-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] -; AVX512VBMI-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3] -; AVX512VBMI-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX512VBMI-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] -; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} ymm0 = <0,8,16,24,u,u,u,u,u,u,u,u,u,u,u,u> +; AVX512VBMI-NEXT: vmovdqu (%rdi), %ymm1 +; AVX512VBMI-NEXT: vpermt2w %zmm0, %zmm0, %zmm1 +; AVX512VBMI-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX512VBMI-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX512VBMI-NEXT: retq ; From 8c9ffe34d932e2e17cbcf351d6e37783ea5453ae Mon Sep 17 00:00:00 2001 From: Nathan James Date: Tue, 18 Aug 2020 15:52:37 +0100 Subject: [PATCH 046/101] [NFC][clang-tidy] Put abseil headers in alphabetical order --- .../clang-tidy/abseil/AbseilMatcher.h | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h b/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h index f58ff5bc44b214..335c333573f43b 100644 --- a/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h +++ b/clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h @@ -47,14 +47,18 @@ AST_POLYMORPHIC_MATCHER( if (PrefixPosition == StringRef::npos) return false; Path = Path.drop_front(PrefixPosition + AbslPrefix.size()); - static const char *AbseilLibraries[] = { - "algorithm", "base", "container", "debugging", "flags", - "hash", "iterator", "memory", "meta", "numeric", - "random", "strings", "synchronization", "status", "time", - "types", "utility"}; - return std::any_of( - std::begin(AbseilLibraries), std::end(AbseilLibraries), - [&](const char *Library) { return Path.startswith(Library); }); + static const char *AbseilLibraries[] = {"algorithm", "base", + "container", "debugging", + "flags", "hash", + "iterator", "memory", + "meta", "numeric", + "random", "status", + "strings", "synchronization", + "time", "types", + "utility"}; + return llvm::any_of(AbseilLibraries, [&](const char *Library) { + return Path.startswith(Library); + }); } } // namespace ast_matchers From b8088ada05269819dbc95542ea125d074b451abf Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 18 Aug 2020 16:02:21 +0100 Subject: [PATCH 047/101] [LV] Predicated reduction tests. NFC --- .../ARM/mve-reduction-predselect.ll | 644 ++++++++++++++++++ .../LoopVectorize/reduction-predselect.ll | 305 +++++++++ 2 files changed, 949 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-predselect.ll diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll new file mode 100644 index 00000000000000..da5b5a60a400ca --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll @@ -0,0 +1,644 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -dce -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-none-none-eabi" + +define i32 @reduction_sum_single(i32* noalias nocapture %A) { +; CHECK-LABEL: @reduction_sum_single( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 256) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2]] = add <4 x i32> [[VEC_PHI]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) +; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !2 +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l7 = add i32 %sum.02, %l3 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_sum( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 256) +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef) +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP6]] = add <4 x i32> [[TMP5]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) +; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !5 +; CHECK: ._crit_edge: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l7 = add i32 %sum.02, %indvars.iv + %l8 = add i32 %l7, %l3 + %l9 = add i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_prod( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = mul <4 x i32> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: br label [[DOTLR_PH:%.*]] +; CHECK: .lr.ph: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[L9:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L3:%.*]] = load i32, i32* [[L2]], align 4 +; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L5:%.*]] = load i32, i32* [[L4]], align 4 +; CHECK-NEXT: [[L8:%.*]] = mul i32 [[PROD_02]], [[L3]] +; CHECK-NEXT: [[L9]] = mul i32 [[L8]], [[L5]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !7 +; CHECK: ._crit_edge: +; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ [[L9]], [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l8 = mul i32 %prod.02, %l3 + %l9 = mul i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %prod.0.lcssa +} + +define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = and <4 x i32> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ -1, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = and i32 [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[AND]] = and i32 [[ADD]], [[L1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !9 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = and i32 %result.08, %l0 + %and = and i32 %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %and, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = or <4 x i32> [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] +; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !11 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %or = or i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %or, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = xor <4 x i32> [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[L1]], [[L0]] +; CHECK-NEXT: [[XOR]] = xor i32 [[ADD]], [[RESULT_08]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !13 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %xor = xor i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %xor, %for.body ] + ret i32 %result.0.lcssa +} + +define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fadd( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = fadd fast <4 x float> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[FADD]] = fadd fast float [[ADD]], [[L1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !15 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fadd fast float %result.08, %l0 + %fadd = fadd fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fadd, %for.body ] + ret float %result.0.lcssa +} + +define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fmul( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP5]] = fmul fast <4 x float> [[TMP4]], [[WIDE_LOAD1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP5]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FMUL:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L1:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fmul fast float [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[FMUL]] = fmul fast float [[ADD]], [[L1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !17 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ [[FMUL]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fmul fast float %result.08, %l0 + %fmul = fmul fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fmul, %for.body ] + ret float %result.0.lcssa +} + +define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_min( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !19 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp slt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_max( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20 +; CHECK: middle.block: +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = icmp ugt i32 [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !21 +; CHECK: for.end: +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp ugt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +define float @reduction_fmax(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fmax( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ 1.000000e+03, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i32 [[INDVARS_IV]] +; CHECK-NEXT: [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[C0:%.*]] = fcmp ogt float [[RESULT_08]], [[L0]] +; CHECK-NEXT: [[V0]] = select i1 [[C0]], float [[RESULT_08]], float [[L0]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret float [[V0]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %v0, %for.body ], [ 1000.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %c0 = fcmp ogt float %result.08, %l0 + %v0 = select i1 %c0, float %result.08, float %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %v0, %for.body ] + ret float %result.0.lcssa +} diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll new file mode 100644 index 00000000000000..aaae03b9fb3a32 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll @@ -0,0 +1,305 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -prefer-predicate-over-epilog -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define i32 @reduction_sum_single(i32* noalias nocapture %A) { +; CHECK-LABEL: @reduction_sum_single( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP24:%.*]], %pred.load.continue6 ] +; CHECK: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: middle.block: +; CHECK: [[TMP26:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP27:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP26]]) +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l7 = add i32 %sum.02, %l3 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_sum( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP46:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND:%.*]] +; CHECK: [[TMP45:%.*]] = add <4 x i32> [[TMP44]], [[TMP23:%.*]] +; CHECK: [[TMP46]] = add <4 x i32> [[TMP45]], [[TMP43:%.*]] +; CHECK: middle.block: +; CHECK: [[TMP48:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP49:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP48]]) +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l7 = add i32 %sum.02, %indvars.iv + %l8 = add i32 %l7, %l3 + %l9 = add i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %sum.0.lcssa +} + +define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) { +; CHECK-LABEL: @reduction_prod( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP45]] = mul <4 x i32> [[TMP44]], [[TMP43:%.*]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %.lr.ph + +.lr.ph: ; preds = %entry, %.lr.ph + %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] + %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ] + %l2 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l3 = load i32, i32* %l2, align 4 + %l4 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l5 = load i32, i32* %l4, align 4 + %l8 = mul i32 %prod.02, %l3 + %l9 = mul i32 %l8, %l5 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph + %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ] + ret i32 %prod.0.lcssa +} + +define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_and( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = and <4 x i32> [[VEC_PHI]], [[TMP42:%.*]] +; CHECK: [[TMP45]] = and <4 x i32> [[TMP44]], [[TMP43]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = and i32 %result.08, %l0 + %and = and i32 %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %and, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_or( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP45]] = or <4 x i32> [[TMP44:%.*]], [[VEC_PHI]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %or = or i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %or, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_xor( +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP45]] = xor <4 x i32> [[TMP44:%.*]], [[VEC_PHI]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i32 %indvars.iv + %l1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %l1, %l0 + %xor = xor i32 %add, %result.08 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %xor, %for.body ] + ret i32 %result.0.lcssa +} + +define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fadd( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP45]] = fadd fast <4 x float> [[TMP44]], [[TMP43]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fadd fast float %result.08, %l0 + %fadd = fadd fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fadd, %for.body ] + ret float %result.0.lcssa +} + +define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { +; CHECK-LABEL: @reduction_fmul( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x float> [ , %vector.ph ], [ [[TMP45:%.*]], %pred.load.continue14 ] +; CHECK: [[TMP44:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP45]] = fmul fast <4 x float> [[TMP44]], [[TMP43]] +; CHECK: middle.block: +; CHECK: [[TMP47:%.*]] = select <4 x i1> [[TMP3:%.*]], <4 x float> [[TMP45]], <4 x float> [[VEC_PHI]] +; CHECK: [[TMP48:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP47]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] + %arrayidx = getelementptr inbounds float, float* %A, i32 %indvars.iv + %l0 = load float, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %B, i32 %indvars.iv + %l1 = load float, float* %arrayidx2, align 4 + %add = fmul fast float %result.08, %l0 + %fmul = fmul fast float %add, %l1 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi float [ %fmul, %for.body ] + ret float %result.0.lcssa +} + +define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_min( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP25:%.*]], %pred.load.continue6 ] +; CHECK: [[TMP24:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP25]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] +; CHECK: middle.block: +; CHECK: [[TMP27:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP28:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP27]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp slt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} + +define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { +; CHECK-LABEL: @reduction_max( +; CHECK: vector.body: +; CHECK: [[VEC_PHI:%.*]] = phi <4 x i32> [ , %vector.ph ], [ [[TMP25:%.*]], %pred.load.continue6 ] +; CHECK: [[TMP24:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[TMP23:%.*]] +; CHECK: [[TMP25]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]] +; CHECK: middle.block: +; CHECK: [[TMP27:%.*]] = select <4 x i1> [[TMP0:%.*]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]] +; CHECK: [[TMP28:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP27]]) +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv + %l0 = load i32, i32* %arrayidx, align 4 + %c0 = icmp ugt i32 %result.08, %l0 + %v0 = select i1 %c0, i32 %result.08, i32 %l0 + %indvars.iv.next = add i32 %indvars.iv, 1 + %exitcond = icmp eq i32 %indvars.iv.next, 257 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + %result.0.lcssa = phi i32 [ %v0, %for.body ] + ret i32 %result.0.lcssa +} From 87122c3480e2115951045102bb26eedc200c8473 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Aug 2020 16:08:15 +0100 Subject: [PATCH 048/101] [X86] Regenerate load-slice test labels. NFCI. Pulled out a superfluous diff from D66004 --- llvm/test/CodeGen/X86/load-slice.ll | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/X86/load-slice.ll b/llvm/test/CodeGen/X86/load-slice.ll index 3cbb70bd70d788..3bf95778f5647b 100644 --- a/llvm/test/CodeGen/X86/load-slice.ll +++ b/llvm/test/CodeGen/X86/load-slice.ll @@ -16,7 +16,7 @@ ; Low slice starts at 0 (base) and is 8-bytes aligned. ; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned. ; -; STRESS-LABEL: t1: +; STRESS-LABEL: _t1: ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. ; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]] ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. @@ -31,7 +31,7 @@ ; STRESS-NEXT: vmovlps [[RES_Vec]], ([[BASE]]) ; ; Same for REGULAR, we eliminate register bank copy with each slices. -; REGULAR-LABEL: t1: +; REGULAR-LABEL: _t1: ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. ; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]] ; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. @@ -90,14 +90,14 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) ; Low slice starts at 0 (base) and is 8-bytes aligned. ; High slice starts at 6 (base + 6-bytes) and is 2-bytes aligned. ; -; STRESS-LABEL: t2: +; STRESS-LABEL: _t2: ; STRESS: movzwl 6([[BASE:[^)]+]]), %eax ; STRESS-NEXT: addl ([[BASE]]), %eax ; STRESS-NEXT: ret ; ; For the REGULAR heuristic, this is not profitable to slice things that are not ; next to each other in memory. Here we have a hole with bytes #4-5. -; REGULAR-LABEL: t2: +; REGULAR-LABEL: _t2: ; REGULAR: shrq $48 define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) { %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start @@ -117,11 +117,11 @@ define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) { ; Second slice uses bytes numbered 6 and 7. ; Third slice uses bytes numbered 4 to 7. ; -; STRESS-LABEL: t3: +; STRESS-LABEL: _t3: ; STRESS: shrq $48 ; STRESS: shrq $32 ; -; REGULAR-LABEL: t3: +; REGULAR-LABEL: _t3: ; REGULAR: shrq $48 ; REGULAR: shrq $32 define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) { From a65a50540e3b5dd1938a1d14f31b912a311537fb Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Tue, 18 Aug 2020 08:16:25 -0700 Subject: [PATCH 049/101] [mlir][Linalg] Canonicalize tensor_reshape(splat-constant) -> splat-constant. When the operand to the linalg.tensor_reshape op is a splat constant, the result can be replaced with a splat constant of the same value but different type. Differential Revision: https://reviews.llvm.org/D86117 --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 22 ++++++++- mlir/test/Dialect/Linalg/canonicalize.mlir | 57 ++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 009699be526321..308272d66d567a 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/AffineMap.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" +#include "mlir/IR/Matchers.h" #include "mlir/IR/Module.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" @@ -734,9 +735,28 @@ static LogicalResult verify(TensorReshapeOp op) { return success(); } +/// Reshape of a splat constant can be replaced with a constant of the result +/// type. +struct FoldReshapeWithConstant : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + LogicalResult matchAndRewrite(TensorReshapeOp reshapeOp, + PatternRewriter &rewriter) const override { + DenseElementsAttr attr; + if (!matchPattern(reshapeOp.src(), m_Constant(&attr))) + return failure(); + if (!attr || !attr.isSplat()) + return failure(); + DenseElementsAttr newAttr = DenseElementsAttr::getFromRawBuffer( + reshapeOp.getResultType(), attr.getRawData(), true); + rewriter.replaceOpWithNewOp(reshapeOp, newAttr); + return success(); + } +}; + void TensorReshapeOp::getCanonicalizationPatterns( OwningRewritePatternList &results, MLIRContext *context) { - results.insert>(context); + results.insert, FoldReshapeWithConstant>( + context); } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 005bd1c874458e..85321084cd0c68 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -203,3 +203,60 @@ func @dce_zero_memref(%arg0 : memref<0xf32>, %arg1: tensor<0xf32>) -> tensor<0xf // CHECK-NOT: linalg.copy // CHECK-NEXT: linalg.generic +// ----- + +func @reshape_splat_constant_int32() -> tensor<2x4x2xi32> +{ + %c0 = constant dense<42> : tensor<2x8xi32> + %0 = linalg.tensor_reshape %c0 + [affine_map<(d0, d1, d2) -> (d0)>, + affine_map<(d0, d1, d2) -> (d1, d2)>] + : tensor<2x8xi32> into tensor<2x4x2xi32> + return %0 : tensor<2x4x2xi32> +} +// CHECK-LABEL: @reshape_splat_constant_int32 +// CHECK: %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xi32> +// CHECK-NOT: linalg.tensor_reshape +// CHECK: return %[[CST]] + +func @reshape_splat_constant_int16() -> tensor<2x4x2xi16> +{ + %c0 = constant dense<42> : tensor<2x8xi16> + %0 = linalg.tensor_reshape %c0 + [affine_map<(d0, d1, d2) -> (d0)>, + affine_map<(d0, d1, d2) -> (d1, d2)>] + : tensor<2x8xi16> into tensor<2x4x2xi16> + return %0 : tensor<2x4x2xi16> +} +// CHECK-LABEL: @reshape_splat_constant_int16 +// CHECK: %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xi16> +// CHECK-NOT: linalg.tensor_reshape +// CHECK: return %[[CST]] + +func @reshape_splat_constant_float32() -> tensor<2x4x2xf32> +{ + %c0 = constant dense<42.0> : tensor<2x8xf32> + %0 = linalg.tensor_reshape %c0 + [affine_map<(d0, d1, d2) -> (d0)>, + affine_map<(d0, d1, d2) -> (d1, d2)>] + : tensor<2x8xf32> into tensor<2x4x2xf32> + return %0 : tensor<2x4x2xf32> +} +// CHECK-LABEL: @reshape_splat_constant_float32 +// CHECK: %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xf32> +// CHECK-NOT: linalg.tensor_reshape +// CHECK: return %[[CST]] + +func @reshape_splat_constant_float64() -> tensor<2x4x2xf64> +{ + %c0 = constant dense<42.0> : tensor<2x8xf64> + %0 = linalg.tensor_reshape %c0 + [affine_map<(d0, d1, d2) -> (d0)>, + affine_map<(d0, d1, d2) -> (d1, d2)>] + : tensor<2x8xf64> into tensor<2x4x2xf64> + return %0 : tensor<2x4x2xf64> +} +// CHECK-LABEL: @reshape_splat_constant_float64 +// CHECK: %[[CST:.*]] = constant dense<{{.*}}> : tensor<2x4x2xf64> +// CHECK-NOT: linalg.tensor_reshape +// CHECK: return %[[CST]] From f48eced390dcda54766e1c510af10bbcbaebcd7e Mon Sep 17 00:00:00 2001 From: jasonliu Date: Tue, 18 Aug 2020 14:18:53 +0000 Subject: [PATCH 050/101] [XCOFF] emit .rename for .lcomm when necessary Summary: This is a follow up for D82481. For .lcomm directive, although it's not necessary to have .rename emitted, it's still desirable to do it so that we do not see internal 'Rename..' gets print out in symbol table. And we could have consistent naming between TC entry and .lcomm. And also have consistent naming between IR and final object file. Reviewed By: hubert.reinterpretcast Differential Revision: https://reviews.llvm.org/D86075 --- llvm/lib/MC/MCAsmStreamer.cpp | 6 ++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +- .../PowerPC/aix-xcoff-symbol-rename.ll | 90 ++++++++++++------- 3 files changed, 63 insertions(+), 35 deletions(-) diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index db0ed9a73d226f..490557a2db0878 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -797,6 +797,12 @@ void MCAsmStreamer::emitXCOFFLocalCommonSymbol(MCSymbol *LabelSym, OS << ',' << Log2_32(ByteAlignment); EmitEOL(); + + // Print symbol's rename (original name contains invalid character(s)) if + // there is one. + MCSymbolXCOFF *XSym = cast(CsectSym); + if (XSym->hasRename()) + emitXCOFFRenameDirective(XSym, XSym->getSymbolTableName()); } void MCAsmStreamer::emitXCOFFSymbolLinkageWithVisibility( diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 920d6ae1f0d6cf..c7510ec05b2406 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1740,7 +1740,7 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { if (GVKind.isBSSLocal()) OutStreamer->emitXCOFFLocalCommonSymbol( - OutContext.getOrCreateSymbol(GVSym->getUnqualifiedName()), Size, + OutContext.getOrCreateSymbol(GVSym->getSymbolTableName()), Size, GVSym, Alignment.value()); else OutStreamer->emitCommonSymbol(GVSym, Size, Alignment.value()); diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll index 72502f925d29de..f486fc9524c3d3 100644 --- a/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-symbol-rename.ll @@ -18,6 +18,9 @@ ; This is f"o" @"f\22o\22" = common global i32 0, align 4 +; This is f=o +@"f\3do" = internal global i32 0, align 4 + define internal i32 @f$o() { entry: %call = call i32 bitcast (i32 (...)* @"f\40o" to i32 ()*)() @@ -27,8 +30,10 @@ entry: ; This is f&o define i32 @"f\26o"() { entry: - %call = call i32 @f$o() - ret i32 %call + %tmp = call i32 @f$o() + %tmp1 = load i32, i32* @"f\3do" + %tmp2 = add i32 %tmp, %tmp1 + ret i32 %tmp2 } ; This is f&_o @@ -84,12 +89,17 @@ declare i32 @"f\40o"(...) ; ASM-NEXT: .vbyte 4, 10 # 0xa ; ASM-NEXT: .comm _Renamed..2222f_o_[RW],4,2 ; ASM-NEXT: .rename _Renamed..2222f_o_[RW],"f""o""" +; ASM-NEXT: .lcomm _Renamed..3df_o,4,_Renamed..3df_o[BS],2 +; ASM-NEXT: .rename _Renamed..3df_o[BS],"f=o" ; ASM-NEXT: .extern ._Renamed..40f_o[PR] ; ASM-NEXT: .rename ._Renamed..40f_o[PR],".f@o" ; ASM-NEXT: .extern _Renamed..40f_o[DS] ; ASM-NEXT: .rename _Renamed..40f_o[DS],"f@o" ; ASM-NEXT: .toc ; ASM-NEXT: L..C0: +; ASM-NEXT: .tc _Renamed..3df_o[TC],_Renamed..3df_o[BS] +; ASM-NEXT: .rename _Renamed..3df_o[TC],"f=o" +; ASM-NEXT: L..C1: ; ASM-NEXT: .tc _Renamed..40f_o[TC],_Renamed..40f_o[DS] ; ASM-NEXT: .rename _Renamed..40f_o[TC],"f@o" @@ -115,47 +125,59 @@ declare i32 @"f\40o"(...) ; OBJ-NEXT: 34: 90 01 00 08 stw 0, 8(1) ; OBJ-NEXT: 38: 94 21 ff c0 stwu 1, -64(1) ; OBJ-NEXT: 3c: 4b ff ff c5 bl 0x0 -; OBJ-NEXT: 40: 38 21 00 40 addi 1, 1, 64 -; OBJ-NEXT: 44: 80 01 00 08 lwz 0, 8(1) -; OBJ-NEXT: 48: 7c 08 03 a6 mtlr 0 -; OBJ-NEXT: 4c: 4e 80 00 20 blr +; OBJ-NEXT: 40: 80 82 00 00 lwz 4, 0(2) +; OBJ-NEXT: 00000042: R_TOC (idx: 24) f=o[TC] +; OBJ-NEXT: 44: 80 84 00 00 lwz 4, 0(4) +; OBJ-NEXT: 48: 7c 63 22 14 add 3, 3, 4 +; OBJ-NEXT: 4c: 38 21 00 40 addi 1, 1, 64 +; OBJ-NEXT: 50: 80 01 00 08 lwz 0, 8(1) +; OBJ-NEXT: 54: 7c 08 03 a6 mtlr 0 +; OBJ-NEXT: 58: 4e 80 00 20 blr +; OBJ-NEXT: 5c: 60 00 00 00 nop ; OBJ-EMPTY: -; OBJ-NEXT: 00000050 (idx: 10) .f&_o: -; OBJ-NEXT: 50: 80 62 00 00 lwz 3, 0(2) -; OBJ-NEXT: 00000052: R_TOC (idx: 24) f@o[TC] -; OBJ-NEXT: 54: 4e 80 00 20 blr +; OBJ-NEXT: 00000060 (idx: 10) .f&_o: +; OBJ-NEXT: 60: 80 62 00 04 lwz 3, 4(2) +; OBJ-NEXT: 00000062: R_TOC (idx: 26) f@o[TC] +; OBJ-NEXT: 64: 4e 80 00 20 blr ; OBJ-EMPTY: ; OBJ-NEXT: Disassembly of section .data: ; OBJ-EMPTY: -; OBJ-NEXT: 00000058 (idx: 14) f`o: -; OBJ-NEXT: 58: 00 00 00 0a +; OBJ-NEXT: 00000068 (idx: 14) f`o: +; OBJ-NEXT: 68: 00 00 00 0a ; OBJ-EMPTY: -; OBJ-NEXT: 0000005c (idx: 16) f$o[DS]: -; OBJ-NEXT: 5c: 00 00 00 00 -; OBJ-NEXT: 0000005c: R_POS (idx: 6) .f$o -; OBJ-NEXT: 60: 00 00 00 80 -; OBJ-NEXT: 00000060: R_POS (idx: 22) TOC[TC0] -; OBJ-NEXT: 64: 00 00 00 00 +; OBJ-NEXT: 0000006c (idx: 16) f$o[DS]: +; OBJ-NEXT: 6c: 00 00 00 00 +; OBJ-NEXT: 0000006c: R_POS (idx: 6) .f$o +; OBJ-NEXT: 70: 00 00 00 90 +; OBJ-NEXT: 00000070: R_POS (idx: 22) TOC[TC0] +; OBJ-NEXT: 74: 00 00 00 00 ; OBJ-EMPTY: -; OBJ-NEXT: 00000068 (idx: 18) f&o[DS]: -; OBJ-NEXT: 68: 00 00 00 30 -; OBJ-NEXT: 00000068: R_POS (idx: 8) .f&o -; OBJ-NEXT: 6c: 00 00 00 80 -; OBJ-NEXT: 0000006c: R_POS (idx: 22) TOC[TC0] -; OBJ-NEXT: 70: 00 00 00 00 +; OBJ-NEXT: 00000078 (idx: 18) f&o[DS]: +; OBJ-NEXT: 78: 00 00 00 30 +; OBJ-NEXT: 00000078: R_POS (idx: 8) .f&o +; OBJ-NEXT: 7c: 00 00 00 90 +; OBJ-NEXT: 0000007c: R_POS (idx: 22) TOC[TC0] +; OBJ-NEXT: 80: 00 00 00 00 ; OBJ-EMPTY: -; OBJ-NEXT: 00000074 (idx: 20) f&_o[DS]: -; OBJ-NEXT: 74: 00 00 00 50 -; OBJ-NEXT: 00000074: R_POS (idx: 10) .f&_o -; OBJ-NEXT: 78: 00 00 00 80 -; OBJ-NEXT: 00000078: R_POS (idx: 22) TOC[TC0] -; OBJ-NEXT: 7c: 00 00 00 00 +; OBJ-NEXT: 00000084 (idx: 20) f&_o[DS]: +; OBJ-NEXT: 84: 00 00 00 60 +; OBJ-NEXT: 00000084: R_POS (idx: 10) .f&_o +; OBJ-NEXT: 88: 00 00 00 90 +; OBJ-NEXT: 00000088: R_POS (idx: 22) TOC[TC0] +; OBJ-NEXT: 8c: 00 00 00 00 ; OBJ-EMPTY: -; OBJ-NEXT: 00000080 (idx: 24) f@o[TC]: -; OBJ-NEXT: 80: 00 00 00 00 -; OBJ-NEXT: 00000080: R_POS (idx: 2) f@o[DS] +; OBJ-NEXT: 00000090 (idx: 24) f=o[TC]: +; OBJ-NEXT: 90: 00 00 00 9c +; OBJ-NEXT: 00000090: R_POS (idx: 30) f=o[BS] +; OBJ-EMPTY: +; OBJ-NEXT: 00000094 (idx: 26) f@o[TC]: +; OBJ-NEXT: 94: 00 00 00 00 +; OBJ-NEXT: 00000094: R_POS (idx: 2) f@o[DS] ; OBJ-EMPTY: ; OBJ-NEXT: Disassembly of section .bss: ; OBJ-EMPTY: -; OBJ-NEXT: 00000084 (idx: 26) f"o"[RW]: +; OBJ-NEXT: 00000098 (idx: 28) f"o"[RW]: +; OBJ-NEXT: ... +; OBJ-EMPTY: +; OBJ-NEXT: 0000009c (idx: 30) f=o[BS]: ; OBJ-NEXT: ... From 224a8c639eeb36b7a5ac6f8a50295f9ee2cb2518 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Mon, 17 Aug 2020 16:42:28 -0700 Subject: [PATCH 051/101] [GlobalISel][CallLowering] Look through call parameters for flags We weren't looking through the parameters on calls at all. E.g., say you had ``` declare i32 @zext(i32 zeroext %x) ... %y = call i32 @zext(i32 %something) ... ``` At the point of the call, we wouldn't know that the %something should have the zeroext attribute. This sets flags in about the same way as TargetLoweringBase::ArgListEntry::setAttributes. Differential Revision: https://reviews.llvm.org/D86125 --- .../llvm/CodeGen/GlobalISel/CallLowering.h | 5 ++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 30 ++++++++- .../AArch64/GlobalISel/call-translator.ll | 36 +++++++++++ .../CodeGen/AArch64/GlobalISel/swifterror.ll | 64 +++++++++++++++++++ .../CodeGen/AArch64/GlobalISel/swiftself.ll | 11 ++++ .../GlobalISel/irtranslator-call-sret.ll | 17 +++-- 6 files changed, 155 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 38afed764f2939..1eec08f5106220 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -208,6 +208,11 @@ class CallLowering { return static_cast(TLI); } + /// \returns Flags corresponding to the attributes on the \p ArgIdx-th + /// parameter of \p Call. + ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call, + unsigned ArgIdx) const; + template void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 661a8560a1c998..e443f603def6b3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -30,6 +30,34 @@ using namespace llvm; void CallLowering::anchor() {} +ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, + unsigned ArgIdx) const { + ISD::ArgFlagsTy Flags; + if (Call.paramHasAttr(ArgIdx, Attribute::SExt)) + Flags.setSExt(); + if (Call.paramHasAttr(ArgIdx, Attribute::ZExt)) + Flags.setZExt(); + if (Call.paramHasAttr(ArgIdx, Attribute::InReg)) + Flags.setInReg(); + if (Call.paramHasAttr(ArgIdx, Attribute::StructRet)) + Flags.setSRet(); + if (Call.paramHasAttr(ArgIdx, Attribute::Nest)) + Flags.setNest(); + if (Call.paramHasAttr(ArgIdx, Attribute::ByVal)) + Flags.setByVal(); + if (Call.paramHasAttr(ArgIdx, Attribute::Preallocated)) + Flags.setPreallocated(); + if (Call.paramHasAttr(ArgIdx, Attribute::InAlloca)) + Flags.setInAlloca(); + if (Call.paramHasAttr(ArgIdx, Attribute::Returned)) + Flags.setReturned(); + if (Call.paramHasAttr(ArgIdx, Attribute::SwiftSelf)) + Flags.setSwiftSelf(); + if (Call.paramHasAttr(ArgIdx, Attribute::SwiftError)) + Flags.setSwiftError(); + return Flags; +} + bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArrayRef ResRegs, ArrayRef> ArgRegs, @@ -44,7 +72,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, unsigned i = 0; unsigned NumFixedArgs = CB.getFunctionType()->getNumParams(); for (auto &Arg : CB.args()) { - ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, + ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i), i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); Info.OrigArgs.push_back(OrigArg); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll index ad38b2bb8b9c23..7eb21c21b86cc5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -151,6 +151,42 @@ define void @test_abi_exts_call(i8* %addr) { ret void } +; CHECK-LABEL: name: test_zext_in_callee +; CHECK: bb.1 (%ir-block.0): +; CHECK: liveins: $x0 +; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.addr) +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp +; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) +; CHECK: $w0 = COPY [[ZEXT]](s32) +; CHECK: BL @has_zext_param, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 +; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp +; CHECK: RET_ReallyLR +declare void @has_zext_param(i8 zeroext) +define void @test_zext_in_callee(i8* %addr) { + %val = load i8, i8* %addr + call void @has_zext_param(i8 %val) + ret void +} + +; CHECK-LABEL: name: test_sext_in_callee +; CHECK: bb.1 (%ir-block.0): +; CHECK: liveins: $x0 +; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 +; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load 1 from %ir.addr) +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp +; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) +; CHECK: $w0 = COPY [[SEXT]](s32) +; CHECK: BL @has_sext_param, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0 +; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp +; CHECK: RET_ReallyLR +declare void @has_sext_param(i8 signext) +define void @test_sext_in_callee(i8* %addr) { + %val = load i8, i8* %addr + call void @has_sext_param(i8 %val) + ret void +} + ; CHECK-LABEL: name: test_abi_sext_ret ; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD ; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_SEXT [[VAL]](s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll index 4a3e5b04681476..a4a1747b05af9b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -513,3 +513,67 @@ a: %error = load %swift_error*, %swift_error** %error_ptr ret %swift_error* %error } + +; foo takes a swifterror parameter. We should be able to see that even when +; it isn't explicitly on the call. +define float @swifterror_param_not_on_call(i8* %error_ref) { +; CHECK-LABEL: swifterror_param_not_on_call: +; CHECK: mov [[ID:x[0-9]+]], x0 +; CHECK: bl {{.*}}foo +; CHECK: mov x0, x21 +; CHECK: cbnz x21 +; Access part of the error object and save it to error_ref +; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8] +; CHECK: strb [[CODE]], [{{.*}}[[ID]]] +; CHECK: bl {{.*}}free + +entry: + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + %call = call float @foo(%swift_error** %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + ret float 1.0 +} + +; foo_sret takes an sret parameter and a swifterror parameter. We should be +; able to see that, even if it's not explicitly on the call. +define float @swifterror_param_not_on_call2(i8* %error_ref) { +; CHECK-LABEL: swifterror_param_not_on_call2: +; CHECK: mov [[ID:x[0-9]+]], x0 +; CHECK: mov [[ZERO:x[0-9]+]], xzr +; CHECK: bl {{.*}}foo_sret +; CHECK: mov x0, x21 +; CHECK: cbnz x21 +; Access part of the error object and save it to error_ref +; CHECK: ldrb [[CODE:w[0-9]+]], [x0, #8] +; CHECK: strb [[CODE]], [{{.*}}[[ID]]] +; CHECK: bl {{.*}}free + +entry: + %s = alloca %struct.S, align 8 + %error_ptr_ref = alloca swifterror %swift_error* + store %swift_error* null, %swift_error** %error_ptr_ref + call void @foo_sret(%struct.S* %s, i32 1, %swift_error** %error_ptr_ref) + %error_from_foo = load %swift_error*, %swift_error** %error_ptr_ref + %had_error_from_foo = icmp ne %swift_error* %error_from_foo, null + %tmp = bitcast %swift_error* %error_from_foo to i8* + br i1 %had_error_from_foo, label %handler, label %cont +cont: + %v1 = getelementptr inbounds %swift_error, %swift_error* %error_from_foo, i64 0, i32 1 + %t = load i8, i8* %v1 + store i8 %t, i8* %error_ref + br label %handler +handler: + call void @free(i8* %tmp) + ret float 1.0 +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll index 8ed06f23383c4f..0f090d488cf109 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swiftself.ll @@ -60,3 +60,14 @@ entry: store i8* %3, i8** %0, align 8 ret void } + +; Check that x20 is used to pass a swiftself argument when the parameter is +; only in the declaration's arguments. +; CHECK-LABEL: _swiftself_not_on_call_params: +; CHECK: mov x20, x0 +; CHECK: bl {{_?}}swiftself_param +; CHECK: ret +define i8 *@swiftself_not_on_call_params(i8* %arg) { + %res = call i8 *@swiftself_param(i8* %arg) + ret i8 *%res +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index d53cfe688f53cf..f244a840476daf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -49,9 +49,12 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32) ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN: $vgpr0 = COPY [[FRAME_INDEX1]](p5) - ; GCN: $vgpr1 = COPY [[FRAME_INDEX]](p5) - ; GCN: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; GCN: [[COPY21:%[0-9]+]]:_(p5) = COPY $sp_reg + ; GCN: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY21]], [[C6]](s32) + ; GCN: G_STORE [[FRAME_INDEX]](p5), [[PTR_ADD2]](p5) :: (store 4 into stack, align 16, addrspace 5) + ; GCN: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; GCN: $sgpr4_sgpr5 = COPY [[COPY11]](p4) ; GCN: $sgpr6_sgpr7 = COPY [[COPY12]](p4) ; GCN: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) @@ -60,11 +63,11 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN: $sgpr13 = COPY [[COPY16]](s32) ; GCN: $sgpr14 = COPY [[COPY17]](s32) ; GCN: $vgpr31 = COPY [[OR1]](s32) - ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 - ; GCN: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) + ; GCN: $sgpr30_sgpr31 = SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 + ; GCN: ADJCALLSTACKDOWN 0, 8, implicit-def $scc + ; GCN: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) ; GCN: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load 1 from %ir.out.gep02, addrspace 5) - ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5) + ; GCN: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load 4 from %ir.out.gep1, addrspace 5) ; GCN: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store 1 into `i8 addrspace(1)* undef`, addrspace 1) ; GCN: G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) ; GCN: S_ENDPGM 0 From ec29538af2e0886a65f479d6a533956a1c478132 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 13 Aug 2020 09:00:26 -0700 Subject: [PATCH 052/101] [ELF] Assign file offsets of non-SHF_ALLOC after SHF_ALLOC and set sh_addr=0 to non-SHF_ALLOC * GNU ld places non-SHF_ALLOC sections after SHF_ALLOC sections. This has the advantage that the file offsets of a non-SHF_ALLOC cannot be contained in a PT_LOAD. This patch matches the behavior. * For non-SHF_ALLOC non-orphan sections, GNU ld may assign non-zero sh_addr and treat them similar to SHT_NOBITS (not advance location counter). This is an alternative approach to what we have done in D85100. By placing non-SHF_ALLOC sections at the end, we can drop special cases in createSection and findOrphanPos added by D85100. Different from GNU ld, we set sh_addr to 0 for non-SHF_ALLOC sections. 0 arguably is better because non-SHF_ALLOC sections don't appear in the memory image. ELF spec says: > sh_addr - If the section will appear in the memory image of a process, this > member gives the address at which the section's first byte should > reside. Otherwise, the member contains 0. D85100 appeared to take a detour. If we take a combined view on D85100 and this patch, the overall complexity slightly increases (one more 3-line loop) and compatibility with GNU ld improves. The behavior we don't want to match is the special treatment of .symtab .shstrtab .strtab: they can be matched in LLD but not in GNU ld. Reviewed By: jhenderson, psmith Differential Revision: https://reviews.llvm.org/D85867 --- lld/ELF/LinkerScript.cpp | 37 +++++--- lld/ELF/Writer.cpp | 13 +-- .../linkerscript/memory-region-alignment.test | 21 ++--- lld/test/ELF/linkerscript/sections-nonalloc.s | 90 +++++++++++++++++++ lld/test/ELF/linkerscript/sections.s | 39 +------- .../ELF/linkerscript/symbols-non-alloc.test | 7 +- 6 files changed, 138 insertions(+), 69 deletions(-) create mode 100644 lld/test/ELF/linkerscript/sections-nonalloc.s diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index a187aa1eb05a20..7e97576923c975 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -586,8 +586,6 @@ static OutputSection *findByName(ArrayRef vec, static OutputSection *createSection(InputSectionBase *isec, StringRef outsecName) { OutputSection *sec = script->createOutputSection(outsecName, ""); - if (!(isec->flags & SHF_ALLOC)) - sec->addrExpr = [] { return 0; }; sec->recordSection(isec); return sec; } @@ -852,21 +850,27 @@ static OutputSection *findFirstSection(PhdrEntry *load) { void LinkerScript::assignOffsets(OutputSection *sec) { const bool sameMemRegion = ctx->memRegion == sec->memRegion; const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr; + const uint64_t savedDot = dot; ctx->memRegion = sec->memRegion; ctx->lmaRegion = sec->lmaRegion; - if (ctx->memRegion) - dot = ctx->memRegion->curPos; - - if (sec->addrExpr) - setDot(sec->addrExpr, sec->location, false); - // If the address of the section has been moved forward by an explicit - // expression so that it now starts past the current curPos of the enclosing - // region, we need to expand the current region to account for the space - // between the previous section, if any, and the start of this section. - if (ctx->memRegion && ctx->memRegion->curPos < dot) - expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, - ctx->memRegion->name, sec->name); + if (sec->flags & SHF_ALLOC) { + if (ctx->memRegion) + dot = ctx->memRegion->curPos; + if (sec->addrExpr) + setDot(sec->addrExpr, sec->location, false); + + // If the address of the section has been moved forward by an explicit + // expression so that it now starts past the current curPos of the enclosing + // region, we need to expand the current region to account for the space + // between the previous section, if any, and the start of this section. + if (ctx->memRegion && ctx->memRegion->curPos < dot) + expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, + ctx->memRegion->name, sec->name); + } else { + // Non-SHF_ALLOC sections have zero addresses. + dot = 0; + } switchTo(sec); @@ -918,6 +922,11 @@ void LinkerScript::assignOffsets(OutputSection *sec) { for (InputSection *sec : cast(base)->sections) output(sec); } + + // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections + // as they are not part of the process image. + if (!(sec->flags & SHF_ALLOC)) + dot = savedDot; } static bool isDiscardable(OutputSection &sec) { diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index cffdce0d6c310a..b26817b66e2711 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1234,13 +1234,7 @@ static bool shouldSkip(BaseCommand *cmd) { static std::vector::iterator findOrphanPos(std::vector::iterator b, std::vector::iterator e) { - // OutputSections without the SHF_ALLOC flag are not part of the memory image - // and their addresses usually don't matter. Place any orphan sections without - // the SHF_ALLOC flag at the end so that these do not affect the address - // assignment of OutputSections with the SHF_ALLOC flag. OutputSection *sec = cast(*e); - if (!(sec->flags & SHF_ALLOC)) - return e; // Find the first element that has as close a rank as possible. auto i = std::max_element(b, e, [=](BaseCommand *a, BaseCommand *b) { @@ -2589,7 +2583,11 @@ template void Writer::assignFileOffsets() { if (p->p_type == PT_LOAD && (p->p_flags & PF_X)) lastRX = p; + // Layout SHF_ALLOC sections before non-SHF_ALLOC sections. A non-SHF_ALLOC + // will not occupy file offsets contained by a PT_LOAD. for (OutputSection *sec : outputSections) { + if (!(sec->flags & SHF_ALLOC)) + continue; off = setFileOffset(sec, off); // If this is a last section of the last executable segment and that @@ -2599,6 +2597,9 @@ template void Writer::assignFileOffsets() { lastRX->lastSec == sec) off = alignTo(off, config->commonPageSize); } + for (OutputSection *sec : outputSections) + if (!(sec->flags & SHF_ALLOC)) + off = setFileOffset(sec, off); sectionHeaderOff = alignTo(off, config->wordsize); fileSize = sectionHeaderOff + (outputSections.size() + 1) * sizeof(Elf_Shdr); diff --git a/lld/test/ELF/linkerscript/memory-region-alignment.test b/lld/test/ELF/linkerscript/memory-region-alignment.test index f0540a7f11a789..ea858299a7ebac 100644 --- a/lld/test/ELF/linkerscript/memory-region-alignment.test +++ b/lld/test/ELF/linkerscript/memory-region-alignment.test @@ -1,5 +1,5 @@ # REQUIRES: x86 -# RUN: echo '.section .foo,"a"; .quad 0; .section .zed,"M",@progbits,1; .byte 0' > %t.s +# RUN: echo '.section .foo,"a"; .quad 0; .section .zed,"aM",@progbits,1; .byte 0' > %t.s # RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %t.s -o %t.o MEMORY { @@ -28,24 +28,25 @@ SECTIONS { # CHECK-NEXT: Offset: 0x1008 # CHECK-NEXT: Size: 8 -# CHECK: Name: .text +# CHECK: Name: .zed # CHECK-NEXT: Type: SHT_PROGBITS # CHECK-NEXT: Flags [ # CHECK-NEXT: SHF_ALLOC -# CHECK-NEXT: SHF_EXECINSTR +# CHECK-NEXT: SHF_MERGE # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x10 # CHECK-NEXT: Offset: 0x1010 -# CHECK-NEXT: Size: 0 +# CHECK-NEXT: Size: 1 -# CHECK: Name: .zed +# CHECK: Name: .text # CHECK-NEXT: Type: SHT_PROGBITS # CHECK-NEXT: Flags [ -# CHECK-NEXT: SHF_MERGE +# CHECK-NEXT: SHF_ALLOC +# CHECK-NEXT: SHF_EXECINSTR # CHECK-NEXT: ] -# CHECK-NEXT: Address: 0x10 -# CHECK-NEXT: Offset: 0x1010 -# CHECK-NEXT: Size: 1 +# CHECK-NEXT: Address: 0x14 +# CHECK-NEXT: Offset: 0x1014 +# CHECK-NEXT: Size: 0 # CHECK: Name: .comment # CHECK-NEXT: Type: SHT_PROGBITS @@ -54,5 +55,5 @@ SECTIONS { # CHECK-NEXT: SHF_STRINGS # CHECK-NEXT: ] # CHECK-NEXT: Address: 0x0 -# CHECK-NEXT: Offset: 0x1011 +# CHECK-NEXT: Offset: 0x1014 # CHECK-NEXT: Size: 8 diff --git a/lld/test/ELF/linkerscript/sections-nonalloc.s b/lld/test/ELF/linkerscript/sections-nonalloc.s new file mode 100644 index 00000000000000..a0669f701d8c90 --- /dev/null +++ b/lld/test/ELF/linkerscript/sections-nonalloc.s @@ -0,0 +1,90 @@ +# REQUIRES: x86 +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/main.s -o %t.o + +## Non-SHF_ALLOC sections are placed after all SHF_ALLOC sections. They will +## thus not be contained in a PT_LOAD segment. data2 has a PT_LOAD segment, +## even if it is preceded by a non-SHF_ALLOC section. Non-SHF_ALLOC orphan +## sections have zero addresses. +## NOTE: GNU ld assigns non-zero addresses to non-SHF_ALLOC non-orphan sections. +# RUN: ld.lld -T %t/a.lds %t.o -o %ta +# RUN: llvm-readelf -S -l %ta | FileCheck %s + +# CHECK: [Nr] Name Type Address Off Size ES Flg Lk +# CHECK-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 +# CHECK-NEXT: [ 1] .bss NOBITS 0000000000000000 001000 000001 00 WA 0 +# CHECK-NEXT: [ 2] data1 PROGBITS 0000000000000001 001001 000001 00 WA 0 +# CHECK-NEXT: [ 3] data3 PROGBITS 0000000000000002 001002 000001 00 WA 0 +# CHECK-NEXT: [ 4] other1 PROGBITS 0000000000000000 001008 000001 00 0 +# CHECK-NEXT: [ 5] other2 PROGBITS 0000000000000000 001010 000001 00 0 +## Orphan placement places other3, .symtab, .shstrtab and .strtab after other2. +# CHECK-NEXT: [ 6] other3 PROGBITS 0000000000000000 001020 000001 00 0 +# CHECK-NEXT: [ 7] .symtab SYMTAB 0000000000000000 001028 000030 18 9 +# CHECK-NEXT: [ 8] .shstrtab STRTAB 0000000000000000 001058 00004d 00 0 +# CHECK-NEXT: [ 9] .strtab STRTAB 0000000000000000 0010a5 000008 00 0 +# CHECK-NEXT: [10] data2 PROGBITS 0000000000000003 001003 000001 00 WA 0 +# CHECK-NEXT: [11] .text PROGBITS 0000000000000004 001004 000001 00 AX 0 + +# CHECK: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x000004 0x000004 RW 0x1000 +# CHECK-NEXT: LOAD 0x001004 0x0000000000000004 0x0000000000000004 0x000001 0x000001 R E 0x1000 +# CHECK-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0 + +# RUN: ld.lld -T %t/b.lds %t.o -o %tb +# RUN: llvm-readelf -S -l %tb | FileCheck %s --check-prefix=CHECK1 + +# CHECK1: [Nr] Name Type Address Off Size ES Flg Lk +# CHECK1-NEXT: [ 0] NULL 0000000000000000 000000 000000 00 0 +# CHECK1-NEXT: [ 1] .text PROGBITS 00000000000000b0 0000b0 000001 00 AX 0 +# CHECK1-NEXT: [ 2] .bss NOBITS 00000000000000b1 0000b1 000001 00 WA 0 +# CHECK1-NEXT: [ 3] data1 PROGBITS 00000000000000b2 0000b2 000001 00 WA 0 +# CHECK1-NEXT: [ 4] data3 PROGBITS 00000000000000b3 0000b3 000001 00 WA 0 +# CHECK1-NEXT: [ 5] other1 PROGBITS 0000000000000000 0000b8 000001 00 0 +# CHECK1-NEXT: [ 6] other2 PROGBITS 0000000000000000 0000c0 000001 00 0 +# CHECK1-NEXT: [ 7] other3 PROGBITS 0000000000000000 0000d0 000001 00 0 +# CHECK1-NEXT: [ 8] .symtab SYMTAB 0000000000000000 0000d8 000030 18 10 +# CHECK1-NEXT: [ 9] .shstrtab STRTAB 0000000000000000 000108 00004d 00 0 +# CHECK1-NEXT: [10] .strtab STRTAB 0000000000000000 000155 000008 00 0 +# CHECK1-NEXT: [11] data2 PROGBITS 00000000000000b4 0000b4 000001 00 WA 0 +# CHECK1: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align +# CHECK1-NEXT: LOAD 0x000000 0x0000000000000000 0x0000000000000000 0x0000b5 0x0000b5 RWE 0x1000 +# CHECK1-NEXT: 0x60000000 0x0000b8 0x0000000000000000 0x0000000000000000 0x000009 0x000001 0x8 + +#--- a.lds +SECTIONS { + .bss : { *(.bss) } + data1 : { *(data1) } + other1 : { *(other1) } + other2 : { *(other2) } + data2 : { *(data2) } + .text : { *(.text) } + /DISCARD/ : { *(.comment) } +} + +#--- b.lds +PHDRS { + text PT_LOAD FILEHDR PHDRS; + foo 0x60000000 FLAGS (0); +} +SECTIONS { + . = SIZEOF_HEADERS; + .text : { *(.text) } : text + .bss : { *(.bss) } : text + data1 : { *(data1) } : text + other1 : { *(other1) } : foo + other2 : { *(other2) } : foo + data2 : { *(data1) } : text + /DISCARD/ : { *(.comment) } +} + +#--- main.s +.globl _start +_start: nop +.section data1,"aw"; .byte 0 +.section data2,"aw"; .byte 0 +.section data3,"aw"; .byte 0 +.bss; .byte 0 + +.section other1; .p2align 2; .byte 0 +.section other2; .p2align 3; .byte 0 +.section other3; .p2align 4; .byte 0 diff --git a/lld/test/ELF/linkerscript/sections.s b/lld/test/ELF/linkerscript/sections.s index fa346406b743f8..539aa9c1705888 100644 --- a/lld/test/ELF/linkerscript/sections.s +++ b/lld/test/ELF/linkerscript/sections.s @@ -25,39 +25,6 @@ # SEC-DEFAULT: 7 .shstrtab 0000003b {{[0-9a-f]*}} # SEC-DEFAULT: 8 .strtab 00000008 {{[0-9a-f]*}} -## Sections are placed in the order specified by the linker script. .data has -## a PT_LOAD segment, even if it is preceded by a non-alloc section. To -## allow this, place non-alloc orphan sections at the end and advance -## location counters for non-alloc non-orphan sections. -# RUN: echo "SECTIONS { \ -# RUN: .bss : { *(.bss) } \ -# RUN: other : { *(other) } \ -# RUN: .shstrtab : { *(.shstrtab) } \ -# RUN: .symtab : { *(.symtab) } \ -# RUN: .strtab : { *(.strtab) } \ -# RUN: .data : { *(.data) } \ -# RUN: .text : { *(.text) } }" > %t3.lds -# RUN: ld.lld -o %t3a -T %t3.lds %t -# RUN: llvm-readelf -S -l %t3a | FileCheck --check-prefix=SEC-ORDER %s -# RUN: ld.lld -o %t3b -T %t3.lds --unique %t -# RUN: llvm-readelf -S -l %t3b | FileCheck --check-prefix=SEC-ORDER %s - -# SEC-ORDER: [Nr] Name Type Address Off Size ES Flg -# SEC-ORDER: [ 0] NULL 0000000000000000 000000 000000 00 -# SEC-ORDER-NEXT: [ 1] .bss NOBITS 0000000000000000 001000 000002 00 WA -# SEC-ORDER-NEXT: [ 2] other PROGBITS 0000000000000002 001002 000003 00 WA -# SEC-ORDER-NEXT: [ 3] .shstrtab STRTAB 0000000000000005 001005 00003b 00 -# SEC-ORDER-NEXT: [ 4] .symtab SYMTAB 0000000000000040 001040 000030 18 -# SEC-ORDER-NEXT: [ 5] .strtab STRTAB 0000000000000070 001070 000008 00 -# SEC-ORDER-NEXT: [ 6] .data PROGBITS 0000000000000078 001078 000020 00 WA -# SEC-ORDER-NEXT: [ 7] .text PROGBITS 0000000000000098 001098 00000e 00 AX -# SEC-ORDER-NEXT: [ 8] .comment PROGBITS 0000000000000000 0010a6 000008 01 MS - -# SEC-ORDER: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align -# SEC-ORDER-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x000098 0x000098 RW 0x1000 -# SEC-ORDER-NEXT: LOAD 0x001098 0x0000000000000098 0x0000000000000098 0x00000e 0x00000e R E 0x1000 -# SEC-ORDER-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0 - # .text and .data have swapped names but proper sizes and types. # RUN: echo "SECTIONS { \ # RUN: .data : { *(.text) } \ @@ -112,12 +79,12 @@ # SEP-BY-NONALLOC: [ 1] .text PROGBITS 0000000000000000 001000 00000e 00 AX # SEP-BY-NONALLOC-NEXT: [ 2] .data PROGBITS 000000000000000e 00100e 000020 00 WA # SEP-BY-NONALLOC-NEXT: [ 3] .bss NOBITS 000000000000002e 00102e 000002 00 WA -# SEP-BY-NONALLOC-NEXT: [ 4] .comment PROGBITS 0000000000000030 00102e 000008 01 MS -# SEP-BY-NONALLOC-NEXT: [ 5] other PROGBITS 0000000000000038 001038 000003 00 WA +# SEP-BY-NONALLOC-NEXT: [ 4] .comment PROGBITS 0000000000000000 001033 000008 01 MS +# SEP-BY-NONALLOC: [ 8] other PROGBITS 0000000000000030 001030 000003 00 WA # SEP-BY-NONALLOC: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align # SEP-BY-NONALLOC-NEXT: LOAD 0x001000 0x0000000000000000 0x0000000000000000 0x00000e 0x00000e R E 0x1000 -# SEP-BY-NONALLOC-NEXT: LOAD 0x00100e 0x000000000000000e 0x000000000000000e 0x00002d 0x00002d RW 0x1000 +# SEP-BY-NONALLOC-NEXT: LOAD 0x00100e 0x000000000000000e 0x000000000000000e 0x000025 0x000025 RW 0x1000 # SEP-BY-NONALLOC-NEXT: GNU_STACK 0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW 0 # Input section pattern contains additional semicolon. diff --git a/lld/test/ELF/linkerscript/symbols-non-alloc.test b/lld/test/ELF/linkerscript/symbols-non-alloc.test index 2bd6fc84df4678..ca47b2bfbcac68 100644 --- a/lld/test/ELF/linkerscript/symbols-non-alloc.test +++ b/lld/test/ELF/linkerscript/symbols-non-alloc.test @@ -1,6 +1,6 @@ # REQUIRES: x86 ## The address of a symbol assignment after a non-SHF_ALLOC section equals the -## end address of the section. +## end address of the last SHF_ALLOC section. # RUN: echo '.section .nonalloc,""; .quad 0' \ # RUN: | llvm-mc -filetype=obj -triple=x86_64-unknown-linux - -o %t @@ -8,10 +8,11 @@ # RUN: llvm-objdump --section-headers -t %t2 | FileCheck %s # CHECK: Sections: -# CHECK: .nonalloc 00000008 0000000000000120 +# CHECK: .text 00000000 0000000000000120 +# CHECK: .nonalloc 00000008 0000000000000000 # CHECK: SYMBOL TABLE: -# CHECK: 0000000000000128 g .nonalloc 0000000000000000 Sym +# CHECK: 0000000000000120 g .nonalloc 0000000000000000 Sym SECTIONS { . = SIZEOF_HEADERS; From 645c6856a68af9b9dd7d918f630560cf07462ed7 Mon Sep 17 00:00:00 2001 From: Jamie Schmeiser Date: Tue, 18 Aug 2020 16:05:20 +0000 Subject: [PATCH 053/101] [NFC] Add raw_ostream parameter to printIR routines This is a non-functional-change to generalize the printIR routines so that the output can be saved and manipulated rather than being directly output to dbgs(). This is a prerequisite change for many upcoming changes that allow new ways of examining changes made to the IR in the new pass manager. Reviewed By: aeubanks (Arthur Eubanks) Differential Revision: https://reviews.llvm.org/D85999 --- llvm/lib/Passes/StandardInstrumentations.cpp | 61 ++++++++++---------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 9e9caa1557b406..55dbca71437138 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -86,39 +86,39 @@ Optional> unwrapModule(Any IR) { llvm_unreachable("Unknown IR unit"); } -void printIR(const Function *F, StringRef Banner, StringRef Extra = StringRef(), - bool Brief = false) { +void printIR(raw_ostream &OS, const Function *F, StringRef Banner, + StringRef Extra = StringRef(), bool Brief = false) { if (Brief) { - dbgs() << F->getName() << '\n'; + OS << F->getName() << '\n'; return; } if (!llvm::isFunctionInPrintList(F->getName())) return; - dbgs() << Banner << Extra << "\n" << static_cast(*F); + OS << Banner << Extra << "\n" << static_cast(*F); } -void printIR(const Module *M, StringRef Banner, StringRef Extra = StringRef(), - bool Brief = false) { +void printIR(raw_ostream &OS, const Module *M, StringRef Banner, + StringRef Extra = StringRef(), bool Brief = false) { if (Brief) { - dbgs() << M->getName() << '\n'; + OS << M->getName() << '\n'; return; } if (llvm::isFunctionInPrintList("*") || llvm::forcePrintModuleIR()) { - dbgs() << Banner << Extra << "\n"; - M->print(dbgs(), nullptr, false); + OS << Banner << Extra << "\n"; + M->print(OS, nullptr, false); } else { for (const auto &F : M->functions()) { - printIR(&F, Banner, Extra); + printIR(OS, &F, Banner, Extra); } } } -void printIR(const LazyCallGraph::SCC *C, StringRef Banner, +void printIR(raw_ostream &OS, const LazyCallGraph::SCC *C, StringRef Banner, StringRef Extra = StringRef(), bool Brief = false) { if (Brief) { - dbgs() << *C << '\n'; + OS << *C << '\n'; return; } @@ -127,47 +127,48 @@ void printIR(const LazyCallGraph::SCC *C, StringRef Banner, const Function &F = N.getFunction(); if (!F.isDeclaration() && llvm::isFunctionInPrintList(F.getName())) { if (!BannerPrinted) { - dbgs() << Banner << Extra << "\n"; + OS << Banner << Extra << "\n"; BannerPrinted = true; } - F.print(dbgs()); + F.print(OS); } } } -void printIR(const Loop *L, StringRef Banner, bool Brief = false) { +void printIR(raw_ostream &OS, const Loop *L, StringRef Banner, + bool Brief = false) { if (Brief) { - dbgs() << *L; + OS << *L; return; } const Function *F = L->getHeader()->getParent(); if (!llvm::isFunctionInPrintList(F->getName())) return; - llvm::printLoop(const_cast(*L), dbgs(), std::string(Banner)); + llvm::printLoop(const_cast(*L), OS, std::string(Banner)); } /// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into /// llvm::Any and does actual print job. -void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false, - bool Brief = false) { +void unwrapAndPrint(raw_ostream &OS, Any IR, StringRef Banner, + bool ForceModule = false, bool Brief = false) { if (ForceModule) { if (auto UnwrappedModule = unwrapModule(IR)) - printIR(UnwrappedModule->first, Banner, UnwrappedModule->second); + printIR(OS, UnwrappedModule->first, Banner, UnwrappedModule->second); return; } if (any_isa(IR)) { const Module *M = any_cast(IR); assert(M && "module should be valid for printing"); - printIR(M, Banner, "", Brief); + printIR(OS, M, Banner, "", Brief); return; } if (any_isa(IR)) { const Function *F = any_cast(IR); assert(F && "function should be valid for printing"); - printIR(F, Banner, "", Brief); + printIR(OS, F, Banner, "", Brief); return; } @@ -175,14 +176,14 @@ void unwrapAndPrint(Any IR, StringRef Banner, bool ForceModule = false, const LazyCallGraph::SCC *C = any_cast(IR); assert(C && "scc should be valid for printing"); std::string Extra = std::string(formatv(" (scc: {0})", C->getName())); - printIR(C, Banner, Extra, Brief); + printIR(OS, C, Banner, Extra, Brief); return; } if (any_isa(IR)) { const Loop *L = any_cast(IR); assert(L && "Loop should be valid for printing"); - printIR(L, Banner, Brief); + printIR(OS, L, Banner, Brief); return; } llvm_unreachable("Unknown wrapped IR type"); @@ -226,7 +227,7 @@ void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) { return; SmallString<20> Banner = formatv("*** IR Dump Before {0} ***", PassID); - unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR()); + unwrapAndPrint(dbgs(), IR, Banner, llvm::forcePrintModuleIR()); return; } @@ -241,7 +242,7 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) { popModuleDesc(PassID); SmallString<20> Banner = formatv("*** IR Dump After {0} ***", PassID); - unwrapAndPrint(IR, Banner, llvm::forcePrintModuleIR()); + unwrapAndPrint(dbgs(), IR, Banner, llvm::forcePrintModuleIR()); } void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) { @@ -262,7 +263,7 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) { SmallString<20> Banner = formatv("*** IR Dump After {0} *** invalidated: ", PassID); - printIR(M, Banner, Extra); + printIR(dbgs(), M, Banner, Extra); } void PrintIRInstrumentation::registerCallbacks( @@ -315,7 +316,7 @@ void PrintPassInstrumentation::registerCallbacks( "Unexpectedly skipping special pass"); dbgs() << "Skipping pass: " << PassID << " on "; - unwrapAndPrint(IR, "", false, true); + unwrapAndPrint(dbgs(), IR, "", false, true); }); PIC.registerBeforeNonSkippedPassCallback( @@ -324,12 +325,12 @@ void PrintPassInstrumentation::registerCallbacks( return; dbgs() << "Running pass: " << PassID << " on "; - unwrapAndPrint(IR, "", false, true); + unwrapAndPrint(dbgs(), IR, "", false, true); }); PIC.registerBeforeAnalysisCallback([](StringRef PassID, Any IR) { dbgs() << "Running analysis: " << PassID << " on "; - unwrapAndPrint(IR, "", false, true); + unwrapAndPrint(dbgs(), IR, "", false, true); }); } From aa48a480b89ab969448851ee888357d42ee7761e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 18 Aug 2020 09:07:38 -0700 Subject: [PATCH 054/101] [llvm-dwarfdump][test] Add a --statistics test for a DW_AT_artificial variable There is an untested but useful case: `this` (even if not written) is counted as a source variable. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D86044 --- .../tools/llvm-dwarfdump/X86/statistics.ll | 44 ++++++++++++++++--- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll index 589375ac6f55b2..bd717dfc85b388 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll +++ b/llvm/test/tools/llvm-dwarfdump/X86/statistics.ll @@ -23,6 +23,11 @@ ; ; int boo(int, int) {} +; struct T { +; void empty(); +; }; +; void T::empty() {} + ; Following variables/arguments/members should be counted: ; - GlobalConst, ; - Global, @@ -30,16 +35,17 @@ ; - square::i, ; - cube::i, cube::squared ; - boo::1, boo::2 +; - this in T::empty() ; Skipped entities: ; - declaration of test::a, ; - non-constant member S:fn, ; - arguments of S:fn. -; CHECK: "#unique source variables":9 +; CHECK: "#unique source variables":10 ; +1 extra inline i. -; CHECK: "#source variables":10 +; CHECK: "#source variables":11 ; -1 square::i -; CHECK: "#source variables with location":9 +; CHECK: "#source variables with location":10 ; CHECK: "sum_all_local_vars(#bytes in parent scope)":[[BYTES:[0-9]+]] ; Because of the dbg.value in the middle of the function, the pc range coverage ; must be below 100%. @@ -48,11 +54,11 @@ ; CHECK: "sum_all_local_vars(#bytes in parent scope covered by DW_AT_location)": ; CHECK: "#bytes witin functions":[[FUNCSIZE:[0-9]+]] ; CHECK: "#bytes witin inlined functions":[[INLINESIZE:[0-9]+]] -; CHECK: "#bytes in __debug_info":380 +; CHECK: "#bytes in __debug_info":459 ; CHECK: "#bytes in __debug_loc":35 -; CHECK: "#bytes in __debug_abbrev":303 -; CHECK: "#bytes in __debug_line":117 -; CHECK: "#bytes in __debug_str":204 +; CHECK: "#bytes in __debug_abbrev":384 +; CHECK: "#bytes in __debug_line":126 +; CHECK: "#bytes in __debug_str":231 ; ModuleID = '/tmp/quality.cpp' source_filename = "/tmp/quality.cpp" @@ -118,6 +124,17 @@ entry: ret i32 0, !dbg !58 } +%struct.T = type { i8 } + +define void @_ZN1T5emptyEv(%struct.T* %this) #2 !dbg !59 { +entry: + %this.addr = alloca %struct.T*, align 8 + store %struct.T* %this, %struct.T** %this.addr, align 8 + call void @llvm.dbg.declare(metadata %struct.T** %this.addr, metadata !67, metadata !DIExpression()), !dbg !69 + %this1 = load %struct.T*, %struct.T** %this.addr, align 8 + ret void, !dbg !70 +} + attributes #0 = { alwaysinline nounwind ssp uwtable } attributes #1 = { nounwind readnone speculatable } attributes #2 = { noinline nounwind optnone ssp uwtable } @@ -185,3 +202,16 @@ attributes #2 = { noinline nounwind optnone ssp uwtable } !56 = !DILocation(line: 10, column: 12, scope: !52) !57 = !DILocalVariable(arg: 2, scope: !52, file: !3, line: 10, type: !8) !58 = !DILocation(line: 10, column: 17, scope: !52) + +!59 = distinct !DISubprogram(name: "empty", linkageName: "_ZN1T5emptyEv", scope: !60, file: !3, line: 25, type: !63, scopeLine: 25, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !62, retainedNodes: !4) +!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "T", file: !3, line: 22, size: 8, flags: DIFlagTypePassByValue, elements: !61, identifier: "_ZTS1T") +!61 = !{!62} +!62 = !DISubprogram(name: "empty", linkageName: "_ZN1T5emptyEv", scope: !60, file: !3, line: 23, type: !63, scopeLine: 23, flags: DIFlagPrototyped, spFlags: 0) +!63 = !DISubroutineType(types: !64) +!64 = !{!65, !66} +!65 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) +!66 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !60, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer) +!67 = !DILocalVariable(name: "this", arg: 1, scope: !59, type: !68, flags: DIFlagArtificial | DIFlagObjectPointer) +!68 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !60, size: 64) +!69 = !DILocation(line: 0, scope: !59) +!70 = !DILocation(line: 25, column: 19, scope: !59) From bb54bcf84970c04c9748004f3a4cf59b0c1832a7 Mon Sep 17 00:00:00 2001 From: Dokyung Song Date: Wed, 5 Aug 2020 23:12:19 +0000 Subject: [PATCH 055/101] [libFuzzer] Fix arguments of InsertPartOf/CopyPartOf calls in CrossOver mutator. The CrossOver mutator is meant to cross over two given buffers (referred to as the first/second buffer henceforth). Previously InsertPartOf/CopyPartOf calls used in the CrossOver mutator incorrectly inserted/copied part of the second buffer into a "scratch buffer" (MutateInPlaceHere of the size CurrentMaxMutationLen), rather than the first buffer. This is not intended behavior, because the scratch buffer does not always (i) contain the content of the first buffer, and (ii) have the same size as the first buffer; CurrentMaxMutationLen is typically a lot larger than the size of the first buffer. This patch fixes the issue by using the first buffer instead of the scratch buffer in InsertPartOf/CopyPartOf calls. A FuzzBench experiment was run to make sure that this change does not inadvertently degrade the performance. The performance is largely the same; more details can be found at: https://storage.googleapis.com/fuzzer-test-suite-public/fixcrossover-report/index.html This patch also adds two new tests, namely "cross_over_insert" and "cross_over_copy", which specifically target InsertPartOf and CopyPartOf, respectively. - cross_over_insert.test checks if the fuzzer can use InsertPartOf to trigger the crash. - cross_over_copy.test checks if the fuzzer can use CopyPartOf to trigger the crash. These newly added tests were designed to pass with the current patch, but not without the it (with 790878f291fa5dc58a1c560cb6cc76fd1bfd1c5a these tests do not pass). To achieve this, -max_len was intentionally given a high value. Without this patch, InsertPartOf/CopyPartOf will generate larger inputs, possibly with unpredictable data in it, thereby failing to trigger the crash. The test pass condition for these new tests is narrowed down by (i) limiting mutation depth to 1 (i.e., a single CrossOver mutation should be able to trigger the crash) and (ii) checking whether the mutation sequence of "CrossOver-" leads to the crash. Also note that these newly added tests and an existing test (cross_over.test) all use "-reduce_inputs=0" flags to prevent reducing inputs; it's easier to force the fuzzer to keep original input string this way than tweaking cov-instrumented basic blocks in the source code of the fuzzer executable. Differential Revision: https://reviews.llvm.org/D85554 --- compiler-rt/lib/fuzzer/FuzzerMutate.cpp | 14 ++++++------- compiler-rt/test/fuzzer/CrossOverTest.cpp | 15 +++++++------- compiler-rt/test/fuzzer/cross_over.test | 4 ++-- compiler-rt/test/fuzzer/cross_over_copy.test | 20 +++++++++++++++++++ .../test/fuzzer/cross_over_insert.test | 20 +++++++++++++++++++ 5 files changed, 56 insertions(+), 17 deletions(-) create mode 100644 compiler-rt/test/fuzzer/cross_over_copy.test create mode 100644 compiler-rt/test/fuzzer/cross_over_insert.test diff --git a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp index 29541eac5dc60b..df9ada45bb0391 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMutate.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMutate.cpp @@ -425,26 +425,26 @@ size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, if (!CrossOverWith) return 0; const Unit &O = *CrossOverWith; if (O.empty()) return 0; - MutateInPlaceHere.resize(MaxSize); - auto &U = MutateInPlaceHere; size_t NewSize = 0; switch(Rand(3)) { case 0: - NewSize = CrossOver(Data, Size, O.data(), O.size(), U.data(), U.size()); + MutateInPlaceHere.resize(MaxSize); + NewSize = CrossOver(Data, Size, O.data(), O.size(), + MutateInPlaceHere.data(), MaxSize); + memcpy(Data, MutateInPlaceHere.data(), NewSize); break; case 1: - NewSize = InsertPartOf(O.data(), O.size(), U.data(), U.size(), MaxSize); + NewSize = InsertPartOf(O.data(), O.size(), Data, Size, MaxSize); if (!NewSize) - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); break; case 2: - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); + NewSize = CopyPartOf(O.data(), O.size(), Data, Size); break; default: assert(0); } assert(NewSize > 0 && "CrossOver returned empty unit"); assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); return NewSize; } diff --git a/compiler-rt/test/fuzzer/CrossOverTest.cpp b/compiler-rt/test/fuzzer/CrossOverTest.cpp index a7643570a92b25..b4506f665dc762 100644 --- a/compiler-rt/test/fuzzer/CrossOverTest.cpp +++ b/compiler-rt/test/fuzzer/CrossOverTest.cpp @@ -4,10 +4,11 @@ // Test for a fuzzer. The fuzzer must find the string // ABCDEFGHIJ -// We use it as a test for CrossOver functionality -// by passing two inputs to it: -// ABCDE00000 -// ZZZZZFGHIJ +// We use it as a test for each of CrossOver functionalities +// by passing the following sets of two inputs to it: +// {ABCDE00000, ZZZZZFGHIJ} +// {ABCDEHIJ, ZFG} to specifically test InsertPartOf +// {ABCDE00HIJ, ZFG} to specifically test CopyPartOf // #include #include @@ -42,13 +43,11 @@ static const uint32_t ExpectedHash = 0xe1677acb; extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { // fprintf(stderr, "ExpectedHash: %x\n", ExpectedHash); - if (Size != 10) return 0; + if (Size == 10 && ExpectedHash == simple_hash(Data, Size)) + *NullPtr = 0; if (*Data == 'A') Sink++; if (*Data == 'Z') Sink--; - if (ExpectedHash == simple_hash(Data, Size)) - *NullPtr = 0; return 0; } - diff --git a/compiler-rt/test/fuzzer/cross_over.test b/compiler-rt/test/fuzzer/cross_over.test index 058b5eb2c85cd0..64e06e8cd3667b 100644 --- a/compiler-rt/test/fuzzer/cross_over.test +++ b/compiler-rt/test/fuzzer/cross_over.test @@ -12,7 +12,7 @@ RUN: echo -n ABCDE00000 > %t-corpus/A RUN: echo -n ZZZZZFGHIJ > %t-corpus/B -RUN: not %run %t-CrossOverTest -max_len=10 -seed=1 -runs=10000000 %t-corpus +RUN: not %run %t-CrossOverTest -max_len=10 -reduce_inputs=0 -seed=1 -runs=10000000 %t-corpus # Test the same thing but using -seed_inputs instead of passing the corpus dir. -RUN: not %run %t-CrossOverTest -max_len=10 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B +RUN: not %run %t-CrossOverTest -max_len=10 -reduce_inputs=0 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B diff --git a/compiler-rt/test/fuzzer/cross_over_copy.test b/compiler-rt/test/fuzzer/cross_over_copy.test new file mode 100644 index 00000000000000..24b2f9b3b11325 --- /dev/null +++ b/compiler-rt/test/fuzzer/cross_over_copy.test @@ -0,0 +1,20 @@ +# Tests CrossOver CopyPartOf. +# We want to make sure that the test can find the input +# ABCDEFGHIJ when given two other inputs in the seed corpus: +# ABCDE00HIJ and +# (Z) FG +# +RUN: %cpp_compiler %S/CrossOverTest.cpp -o %t-CrossOverTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n ABCDE00HIJ > %t-corpus/A +RUN: echo -n ZFG > %t-corpus/B + + +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 %t-corpus 2>&1 | FileCheck %s + +# Test the same thing but using -seed_inputs instead of passing the corpus dir. +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B 2>&1 | FileCheck %s + +CHECK: MS: 1 CrossOver- diff --git a/compiler-rt/test/fuzzer/cross_over_insert.test b/compiler-rt/test/fuzzer/cross_over_insert.test new file mode 100644 index 00000000000000..cb7d4fab81ef7e --- /dev/null +++ b/compiler-rt/test/fuzzer/cross_over_insert.test @@ -0,0 +1,20 @@ +# Tests CrossOver InsertPartOf. +# We want to make sure that the test can find the input +# ABCDEFGHIJ when given two other inputs in the seed corpus: +# ABCDE HIJ and +# (Z) FG +# +RUN: %cpp_compiler %S/CrossOverTest.cpp -o %t-CrossOverTest + +RUN: rm -rf %t-corpus +RUN: mkdir %t-corpus +RUN: echo -n ABCDEHIJ > %t-corpus/A +RUN: echo -n ZFG > %t-corpus/B + + +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 %t-corpus 2>&1 | FileCheck %s + +# Test the same thing but using -seed_inputs instead of passing the corpus dir. +RUN: not %run %t-CrossOverTest -mutate_depth=1 -max_len=1024 -reduce_inputs=0 -seed=1 -runs=10000000 -seed_inputs=%t-corpus/A,%t-corpus/B 2>&1 | FileCheck %s + +CHECK: MS: 1 CrossOver- From cc98a0fbe46511ebcbca5600c9ec49901469ae3d Mon Sep 17 00:00:00 2001 From: George Mitenkov Date: Tue, 18 Aug 2020 18:42:23 +0300 Subject: [PATCH 056/101] [MLIR][SPIRVToLLVM] Additional conversions for spirv-runner This patch adds more op/type conversion support necessary for `spirv-runner`: - EntryPoint/ExecutionMode: currently removed since we assume having only one kernel function in the kernel module. - StorageBuffer storage class is now supported. We are not concerned with multithreading so this is fine for now. - Type conversion enhanced, now regular offsets and strides for structs and arrays are supported (based on `VulkanLayoutUtils`). - Support of `spc.AccessChain` that is modelled with GEP op in LLVM dialect. Reviewed By: mravishankar Differential Revision: https://reviews.llvm.org/D86109 --- .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp | 117 ++++++++++++++---- .../SPIRVToLLVM/memory-ops-to-llvm.mlir | 25 ++++ .../SPIRVToLLVM/misc-ops-to-llvm.mlir | 17 +++ .../spirv-types-to-llvm-invalid.mlir | 11 +- .../SPIRVToLLVM/spirv-types-to-llvm.mlir | 8 +- 5 files changed, 141 insertions(+), 37 deletions(-) diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp index e7c5b3c9f6dcd4..9c2ba26274e9ab 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp @@ -14,6 +14,7 @@ #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/SPIRV/LayoutUtils.h" #include "mlir/Dialect/SPIRV/SPIRVDialect.h" #include "mlir/Dialect/SPIRV/SPIRVOps.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" @@ -179,6 +180,22 @@ static Value processCountOrOffset(Location loc, Value value, Type srcType, return optionallyTruncateOrExtend(loc, broadcasted, dstType, rewriter); } +/// Converts SPIR-V struct with a regular (according to `VulkanLayoutUtils`) +/// offset to LLVM struct. Otherwise, the conversion is not supported. +static Optional +convertStructTypeWithOffset(spirv::StructType type, + LLVMTypeConverter &converter) { + if (type != VulkanLayoutUtils::decorateType(type)) + return llvm::None; + + auto elementsVector = llvm::to_vector<8>( + llvm::map_range(type.getElementTypes(), [&](Type elementType) { + return converter.convertType(elementType).cast(); + })); + return LLVM::LLVMType::getStructTy(type.getContext(), elementsVector, + /*isPacked=*/false); +} + /// Converts SPIR-V struct with no offset to packed LLVM struct. static Type convertStructTypePacked(spirv::StructType type, LLVMTypeConverter &converter) { @@ -223,16 +240,22 @@ static LogicalResult replaceWithLoadOrStore(Operation *op, // Type conversion //===----------------------------------------------------------------------===// -/// Converts SPIR-V array type to LLVM array. There is no modelling of array -/// stride at the moment. +/// Converts SPIR-V array type to LLVM array. Natural stride (according to +/// `VulkanLayoutUtils`) is also mapped to LLVM array. This has to be respected +/// when converting ops that manipulate array types. static Optional convertArrayType(spirv::ArrayType type, TypeConverter &converter) { - if (type.getArrayStride() != 0) + unsigned stride = type.getArrayStride(); + Type elementType = type.getElementType(); + auto sizeInBytes = elementType.cast().getSizeInBytes(); + if (stride != 0 && + !(sizeInBytes.hasValue() && sizeInBytes.getValue() == stride)) return llvm::None; - auto elementType = - converter.convertType(type.getElementType()).cast(); + + auto llvmElementType = + converter.convertType(elementType).cast(); unsigned numElements = type.getNumElements(); - return LLVM::LLVMType::getArrayTy(elementType, numElements); + return LLVM::LLVMType::getArrayTy(llvmElementType, numElements); } /// Converts SPIR-V pointer type to LLVM pointer. Pointer's storage class is not @@ -257,13 +280,15 @@ static Optional convertRuntimeArrayType(spirv::RuntimeArrayType type, } /// Converts SPIR-V struct to LLVM struct. There is no support of structs with -/// member decorations or with offset. +/// member decorations. Also, only natural offset is supported. static Optional convertStructType(spirv::StructType type, LLVMTypeConverter &converter) { SmallVector memberDecorations; type.getMemberDecorations(memberDecorations); - if (type.hasOffset() || !memberDecorations.empty()) + if (!memberDecorations.empty()) return llvm::None; + if (type.hasOffset()) + return convertStructTypeWithOffset(type, converter); return convertStructTypePacked(type, converter); } @@ -273,6 +298,31 @@ static Optional convertStructType(spirv::StructType type, namespace { +class AccessChainPattern : public SPIRVToLLVMConversion { +public: + using SPIRVToLLVMConversion::SPIRVToLLVMConversion; + + LogicalResult + matchAndRewrite(spirv::AccessChainOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto dstType = typeConverter.convertType(op.component_ptr().getType()); + if (!dstType) + return failure(); + // To use GEP we need to add a first 0 index to go through the pointer. + auto indices = llvm::to_vector<4>(op.indices()); + Type indexType = op.indices().front().getType(); + auto llvmIndexType = typeConverter.convertType(indexType); + if (!llvmIndexType) + return failure(); + Value zero = rewriter.create( + op.getLoc(), llvmIndexType, rewriter.getIntegerAttr(indexType, 0)); + indices.insert(indices.begin(), zero); + rewriter.replaceOpWithNewOp(op, dstType, op.base_ptr(), + indices); + return success(); + } +}; + class AddressOfPattern : public SPIRVToLLVMConversion { public: using SPIRVToLLVMConversion::SPIRVToLLVMConversion; @@ -545,11 +595,14 @@ class GlobalVariablePattern if (!dstType) return failure(); - // Limit conversion to the current invocation only for now. + // Limit conversion to the current invocation only or `StorageBuffer` + // required by SPIR-V runner. + // This is okay because multiple invocations are not supported yet. auto storageClass = srcType.getStorageClass(); if (storageClass != spirv::StorageClass::Input && storageClass != spirv::StorageClass::Private && - storageClass != spirv::StorageClass::Output) { + storageClass != spirv::StorageClass::Output && + storageClass != spirv::StorageClass::StorageBuffer) { return failure(); } @@ -757,6 +810,20 @@ class NotPattern : public SPIRVToLLVMConversion { } }; +/// A template pattern that erases the given `SPIRVOp`. +template +class ErasePattern : public SPIRVToLLVMConversion { +public: + using SPIRVToLLVMConversion::SPIRVToLLVMConversion; + + LogicalResult + matchAndRewrite(SPIRVOp op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + rewriter.eraseOp(op); + return success(); + } +}; + class ReturnPattern : public SPIRVToLLVMConversion { public: using SPIRVToLLVMConversion::SPIRVToLLVMConversion; @@ -875,18 +942,6 @@ class LoopPattern : public SPIRVToLLVMConversion { } }; -class MergePattern : public SPIRVToLLVMConversion { -public: - using SPIRVToLLVMConversion::SPIRVToLLVMConversion; - - LogicalResult - matchAndRewrite(spirv::MergeOp op, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - rewriter.eraseOp(op); - return success(); - } -}; - /// Converts `spv.selection` with `spv.BranchConditional` in its header block. /// All blocks within selection should be reachable for conversion to succeed. class SelectionPattern : public SPIRVToLLVMConversion { @@ -1266,11 +1321,18 @@ void mlir::populateSPIRVToLLVMConversionPatterns( ConstantScalarAndVectorPattern, // Control Flow ops - BranchConversionPattern, BranchConditionalConversionPattern, LoopPattern, - SelectionPattern, MergePattern, + BranchConversionPattern, BranchConditionalConversionPattern, + FunctionCallPattern, LoopPattern, SelectionPattern, + ErasePattern, + + // Entry points and execution mode + // Module generated from SPIR-V could have other "internal" functions, so + // having entry point and execution mode metadat can be useful. For now, + // simply remove them. + // TODO: Support EntryPoint/ExecutionMode properly. + ErasePattern, ErasePattern, // Function Call op - FunctionCallPattern, // GLSL extended instruction set ops DirectConversionPattern, @@ -1295,8 +1357,9 @@ void mlir::populateSPIRVToLLVMConversionPatterns( NotPattern, // Memory ops - AddressOfPattern, GlobalVariablePattern, LoadStorePattern, - LoadStorePattern, VariablePattern, + AccessChainPattern, AddressOfPattern, GlobalVariablePattern, + LoadStorePattern, LoadStorePattern, + VariablePattern, // Miscellaneous ops DirectConversionPattern, diff --git a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir index 51a734c462a36c..4402a513fb93e8 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/memory-ops-to-llvm.mlir @@ -1,5 +1,30 @@ // RUN: mlir-opt -convert-spirv-to-llvm %s | FileCheck %s +//===----------------------------------------------------------------------===// +// spv.AccessChain +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @access_chain +func @access_chain() -> () { + // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : !llvm.i32 + %0 = spv.constant 1: i32 + %1 = spv.Variable : !spv.ptr>, Function> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: llvm.getelementptr %{{.*}}[%[[ZERO]], %[[ONE]], %[[ONE]]] : (!llvm.ptr)>>, !llvm.i32, !llvm.i32, !llvm.i32) -> !llvm.ptr + %2 = spv.AccessChain %1[%0, %0] : !spv.ptr>, Function>, i32, i32 + return +} + +// CHECK-LABEL: @access_chain_array +func @access_chain_array(%arg0 : i32) -> () { + %0 = spv.Variable : !spv.ptr>, Function> + // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32 + // CHECK: llvm.getelementptr %{{.*}}[%[[ZERO]], %{{.*}}] : (!llvm.ptr>>, !llvm.i32, !llvm.i32) -> !llvm.ptr> + %1 = spv.AccessChain %0[%arg0] : !spv.ptr>, Function>, i32 + %2 = spv.Load "Function" %1 ["Volatile"] : !spv.array<4xf32> + return +} + //===----------------------------------------------------------------------===// // spv.globalVariable and spv._address_of //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir index 2e74485323ede2..d54b91668cd972 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir @@ -20,6 +20,23 @@ func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) { return } +//===----------------------------------------------------------------------===// +// spv.EntryPoint and spv.ExecutionMode +//===----------------------------------------------------------------------===// + +// CHECK: module { +// CHECK-NEXT: llvm.func @empty +// CHECK-NEXT: llvm.return +// CHECK-NEXT: } +// CHECK-NEXT: } +spv.module Logical GLSL450 { + spv.func @empty() -> () "None" { + spv.Return + } + spv.EntryPoint "GLCompute" @empty + spv.ExecutionMode @empty "LocalSize", 1, 1, 1 +} + //===----------------------------------------------------------------------===// // spv.Undef //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir index 96fb9f44af5783..87f0bd8d829808 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm-invalid.mlir @@ -1,21 +1,14 @@ // RUN: mlir-opt %s -convert-spirv-to-llvm -verify-diagnostics -split-input-file // expected-error@+1 {{failed to legalize operation 'spv.func' that was explicitly marked illegal}} -spv.func @array_with_stride(%arg: !spv.array<4 x f32, stride=4>) -> () "None" { +spv.func @array_with_unnatural_stride(%arg: !spv.array<4 x f32, stride=8>) -> () "None" { spv.Return } // ----- // expected-error@+1 {{failed to legalize operation 'spv.func' that was explicitly marked illegal}} -spv.func @struct_with_offset1(%arg: !spv.struct) -> () "None" { - spv.Return -} - -// ----- - -// expected-error@+1 {{failed to legalize operation 'spv.func' that was explicitly marked illegal}} -spv.func @struct_with_offset2(%arg: !spv.struct) -> () "None" { +spv.func @struct_with_unnatural_offset(%arg: !spv.struct) -> () "None" { spv.Return } diff --git a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir index d6618a7de7fed0..454b5b314f88a3 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/spirv-types-to-llvm.mlir @@ -5,7 +5,10 @@ //===----------------------------------------------------------------------===// // CHECK-LABEL: @array(!llvm.array<16 x float>, !llvm.array<32 x vec<4 x float>>) -func @array(!spv.array<16xf32>, !spv.array< 32 x vector<4xf32> >) -> () +func @array(!spv.array<16 x f32>, !spv.array< 32 x vector<4xf32> >) -> () + +// CHECK-LABEL: @array_with_natural_stride(!llvm.array<16 x float>) +func @array_with_natural_stride(!spv.array<16 x f32, stride=4>) -> () //===----------------------------------------------------------------------===// // Pointer type @@ -36,3 +39,6 @@ func @struct(!spv.struct) -> () // CHECK-LABEL: @struct_nested(!llvm.struct)>) func @struct_nested(!spv.struct>) + +// CHECK-LABEL: @struct_with_natural_offset(!llvm.struct<(i8, i32)>) +func @struct_with_natural_offset(!spv.struct) -> () From 31f02ac60aa8e89c04617e82fa2b1140e33e824d Mon Sep 17 00:00:00 2001 From: Sam Tebbs Date: Mon, 17 Aug 2020 16:03:55 +0100 Subject: [PATCH 057/101] [ARM] Use mov operand if the mov cannot be moved while tail predicating There are some cases where the instruction that sets up the iteration count for a tail predicated loop cannot be moved before the dlstp, stopping tail predication entirely. This patch checks if the mov operand can be used and if so, uses that instead. Differential Revision: https://reviews.llvm.org/D86087 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 37 ++- .../LowOverheadLoops/mov-after-dlstp.mir | 269 ++++++++++++++++++ .../Thumb2/LowOverheadLoops/mov-operand.ll | 81 ++++++ 3 files changed, 375 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index be75d6bef08c4a..2e7cd412db1cc0 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -226,6 +226,7 @@ namespace { MachineInstr *Dec = nullptr; MachineInstr *End = nullptr; MachineInstr *VCTP = nullptr; + MachineOperand TPNumElements; SmallPtrSet SecondaryVCTPs; VPTBlock *CurrentBlock = nullptr; SetVector CurrentPredicate; @@ -239,7 +240,8 @@ namespace { LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI, ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI, const ARMBaseInstrInfo &TII) - : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII) { + : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII), + TPNumElements(MachineOperand::CreateImm(0)) { MF = ML.getHeader()->getParent(); if (auto *MBB = ML.getLoopPreheader()) Preheader = MBB; @@ -291,11 +293,10 @@ namespace { SmallVectorImpl &getVPTBlocks() { return VPTBlocks; } - // Return the loop iteration count, or the number of elements if we're tail - // predicating. - MachineOperand &getCount() { - return IsTailPredicationLegal() ? - VCTP->getOperand(1) : Start->getOperand(0); + // Return the operand for the loop start instruction. This will be the loop + // iteration count, or the number of elements if we're tail predicating. + MachineOperand &getLoopStartOperand() { + return IsTailPredicationLegal() ? TPNumElements : Start->getOperand(0); } unsigned getStartOpcode() const { @@ -453,7 +454,8 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { // of the iteration count, to the loop start instruction. The number of // elements is provided to the vctp instruction, so we need to check that // we can use this register at InsertPt. - Register NumElements = VCTP->getOperand(1).getReg(); + TPNumElements = VCTP->getOperand(1); + Register NumElements = TPNumElements.getReg(); // If the register is defined within loop, then we can't perform TP. // TODO: Check whether this is just a mov of a register that would be @@ -466,9 +468,8 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { // The element count register maybe defined after InsertPt, in which case we // need to try to move either InsertPt or the def so that the [w|d]lstp can // use the value. - // TODO: On failing to move an instruction, check if the count is provided by - // a mov and whether we can use the mov operand directly. MachineBasicBlock *InsertBB = StartInsertPt->getParent(); + if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) { if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) { if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) { @@ -482,9 +483,21 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { StartInsertPt); LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); } else { - LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop " - << "start instruction.\n"); - return false; + // If we fail to move an instruction and the element count is provided + // by a mov, use the mov operand if it will have the same value at the + // insertion point + MachineOperand Operand = ElemDef->getOperand(1); + if (isMovRegOpcode(ElemDef->getOpcode()) && + RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) == + RDA.getUniqueReachingMIDef(StartInsertPt, Operand.getReg())) { + TPNumElements = Operand; + NumElements = TPNumElements.getReg(); + } else { + LLVM_DEBUG(dbgs() + << "ARM Loops: Unable to move element count to loop " + << "start instruction.\n"); + return false; + } } } } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir new file mode 100644 index 00000000000000..9a5856335dfc67 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir @@ -0,0 +1,269 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops -tail-predication=enabled %s -o - | FileCheck %s + +--- | + define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) #0 { + entry: + %0 = add i32 %blockSize, 3 + %1 = icmp slt i32 %blockSize, 4 + %smin = select i1 %1, i32 %blockSize, i32 4 + %2 = sub i32 %0, %smin + %3 = lshr i32 %2, 2 + %4 = add nuw nsw i32 %3, 1 + %5 = icmp slt i32 %blockSize, 4 + %smin3 = select i1 %5, i32 %blockSize, i32 4 + %6 = sub i32 %0, %smin3 + %7 = lshr i32 %6, 2 + %8 = add nuw nsw i32 %7, 1 + call void @llvm.set.loop.iterations.i32(i32 %8) + br label %do.body.i + + do.body.i: ; preds = %do.body.i, %entry + %blkCnt.0.i = phi i32 [ %13, %do.body.i ], [ %blockSize, %entry ] + %sumVec.0.i = phi <4 x float> [ %12, %do.body.i ], [ zeroinitializer, %entry ] + %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] + %9 = phi i32 [ %8, %entry ], [ %14, %do.body.i ] + %pSrc.addr.0.i2 = bitcast float* %pSrc.addr.0.i to <4 x float>* + %10 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i) + %11 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.0.i2, i32 4, <4 x i1> %10, <4 x float> zeroinitializer) + %12 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %11, <4 x i1> %10, <4 x float> %sumVec.0.i) + %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4 + %13 = add i32 %blkCnt.0.i, -4 + %14 = call i32 @llvm.loop.decrement.reg.i32(i32 %9, i32 1) + %15 = icmp ne i32 %14, 0 + br i1 %15, label %do.body.i, label %arm_mean_f32_mve.exit + + arm_mean_f32_mve.exit: ; preds = %do.body.i + %16 = extractelement <4 x float> %12, i32 3 + %add2.i.i = fadd fast float %16, %16 + %conv.i = uitofp i32 %blockSize to float + %div.i = fdiv fast float %add2.i.i, %conv.i + %17 = bitcast float %div.i to i32 + %18 = insertelement <4 x i32> undef, i32 %17, i64 0 + %19 = shufflevector <4 x i32> %18, <4 x i32> undef, <4 x i32> zeroinitializer + %20 = bitcast <4 x i32> %19 to <4 x float> + call void @llvm.set.loop.iterations.i32(i32 %4) + br label %do.body + + do.body: ; preds = %do.body, %arm_mean_f32_mve.exit + %blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %26, %do.body ] + %sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %25, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] + %21 = phi i32 [ %4, %arm_mean_f32_mve.exit ], [ %27, %do.body ] + %pSrc.addr.01 = bitcast float* %pSrc.addr.0 to <4 x float>* + %22 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) + %23 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pSrc.addr.01, i32 4, <4 x i1> %22, <4 x float> zeroinitializer) + %24 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %23, <4 x float> %20, <4 x i1> %22, <4 x float> undef) + %25 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %24, <4 x float> %24, <4 x float> %sumVec.0, <4 x i1> %22) + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %26 = add i32 %blkCnt.0, -4 + %27 = call i32 @llvm.loop.decrement.reg.i32(i32 %21, i32 1) + %28 = icmp ne i32 %27, 0 + br i1 %28, label %do.body, label %do.end + + do.end: ; preds = %do.body + %29 = extractelement <4 x float> %25, i32 3 + %add2.i = fadd fast float %29, %29 + %sub2 = add i32 %blockSize, -1 + %conv = uitofp i32 %sub2 to float + %div = fdiv fast float %add2.i, %conv + store float %div, float* %pResult, align 4 + ret void + } + + ; Function Attrs: nounwind readnone + declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 + + ; Function Attrs: nounwind readnone + declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1 + + ; Function Attrs: nounwind readnone + declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 + + ; Function Attrs: argmemonly nounwind readonly willreturn + declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 + + ; Function Attrs: nounwind readnone + declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 + + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #3 + + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #3 + + attributes #0 = { "target-features"="+mve.fp" } + attributes #1 = { nounwind readnone "target-features"="+mve.fp" } + attributes #2 = { argmemonly nounwind readonly willreturn "target-features"="+mve.fp" } + attributes #3 = { noduplicate nounwind } + +... +--- +name: arm_var_f32_mve +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 8 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: arm_var_f32_mve + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $lr, $r0, $r1, $r2, $r4 + ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -8 + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: $r12 = tMOVr $r0, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r3 + ; CHECK: bb.1.do.body.i: + ; CHECK: successors: %bb.1(0x7c000000), %bb.2(0x04000000) + ; CHECK: liveins: $lr, $q0, $r0, $r1, $r2, $r12 + ; CHECK: renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 0, killed $noreg, killed renamable $q0 + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.1 + ; CHECK: bb.2.arm_mean_f32_mve.exit: + ; CHECK: successors: %bb.3(0x80000000) + ; CHECK: liveins: $q0, $r0, $r1, $r2 + ; CHECK: $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg + ; CHECK: $lr = MVE_DLSTP_32 $r1 + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0 + ; CHECK: renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg + ; CHECK: renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 + ; CHECK: $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + ; CHECK: bb.3.do.body: + ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) + ; CHECK: liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3 + ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + ; CHECK: renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.pSrc.addr.01, align 4) + ; CHECK: renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 0, $noreg, undef renamable $q2 + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, killed renamable $q2, 0, killed $noreg + ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.3 + ; CHECK: bb.4.do.end: + ; CHECK: liveins: $q0, $r1, $r2 + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, killed renamable $s3, 14 /* CC::al */, $noreg, implicit killed $q0 + ; CHECK: $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg + ; CHECK: renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg + ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult) + ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc + bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $r0, $r1, $r2, $r4, $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r4, -8 + $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 10, 8, implicit-def $itstate + renamable $r3 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r3, implicit killed $itstate + renamable $r12 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg + renamable $r3, dead $cpsr = tSUBrr renamable $r1, killed renamable $r3, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $r3, dead $cpsr = tADDi8 killed renamable $r3, 3, 14 /* CC::al */, $noreg + renamable $lr = nuw nsw t2ADDrs killed renamable $r12, killed renamable $r3, 19, 14 /* CC::al */, $noreg, $noreg + $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + $r12 = tMOVr $r0, 14 /* CC::al */, $noreg + t2DoLoopStart renamable $lr + $r4 = tMOVr $lr, 14 /* CC::al */, $noreg + + bb.1.do.body.i: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r12 + + renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + MVE_VPST 4, implicit $vpr + renamable $r12, renamable $q1 = MVE_VLDRWU32_post killed renamable $r12, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.0.i2, align 4) + renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, renamable $q0 + t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr + tB %bb.2, 14 /* CC::al */, $noreg + + bb.2.arm_mean_f32_mve.exit: + successors: %bb.3(0x80000000) + liveins: $q0, $r0, $r1, $r2, $r4 + + $s4 = VMOVSR $r1, 14 /* CC::al */, $noreg + $lr = tMOVr $r4, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 + t2DoLoopStart killed $r4 + renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg + renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg + renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, undef renamable $q0 + renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, undef renamable $q1 + $r3 = tMOVr $r1, 14 /* CC::al */, $noreg + + bb.3.do.body: + successors: %bb.3(0x7c000000), %bb.4(0x04000000) + liveins: $lr, $q0, $q1, $r0, $r1, $r2, $r3 + + renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg + renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + MVE_VPST 2, implicit $vpr + renamable $r0, renamable $q2 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr :: (load 16 from %ir.pSrc.addr.01, align 4) + renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VSUBf32 killed renamable $q2, renamable $q1, 1, renamable $vpr, undef renamable $q2 + renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q2, renamable $q2, 1, killed renamable $vpr + t2LoopEnd renamable $lr, %bb.3, implicit-def dead $cpsr + tB %bb.4, 14 /* CC::al */, $noreg + + bb.4.do.end: + liveins: $q0, $r1, $r2 + + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 + $s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg + renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg + VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store 4 into %ir.pResult) + frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc + +... diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll new file mode 100644 index 00000000000000..63a2c0233f6e3e --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-operand.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s +define arm_aapcs_vfpcc void @arm_var_f32_mve(float* %pSrc, i32 %blockSize, float* nocapture %pResult) { +; CHECK-LABEL: .LBB0_1: @ %do.body.i +; CHECK: dlstp.32 lr, r1 +; CHECK-NEXT: vadd.f32 s0, s3, s3 +; CHECK-NEXT: vcvt.f32.u32 s4, s4 +; CHECK-NEXT: vdiv.f32 s0, s0, s4 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: vdup.32 q1, r3 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: .LBB0_3: @ %do.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vldrw.u32 q2, [r0], #16 +; CHECK-NEXT: vsub.f32 q2, q2, q1 +; CHECK-NEXT: vfma.f32 q0, q2, q2 +; CHECK-NEXT: letp lr, .LBB0_3 +entry: + br label %do.body.i + +do.body.i: ; preds = %entry, %do.body.i + %blkCnt.0.i = phi i32 [ %sub.i, %do.body.i ], [ %blockSize, %entry ] + %sumVec.0.i = phi <4 x float> [ %3, %do.body.i ], [ zeroinitializer, %entry ] + %pSrc.addr.0.i = phi float* [ %add.ptr.i, %do.body.i ], [ %pSrc, %entry ] + %0 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0.i) + %1 = bitcast float* %pSrc.addr.0.i to <4 x float>* + %2 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %1, i32 4, <4 x i1> %0, <4 x float> zeroinitializer) + %3 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0.i, <4 x float> %2, <4 x i1> %0, <4 x float> %sumVec.0.i) + %sub.i = add nsw i32 %blkCnt.0.i, -4 + %add.ptr.i = getelementptr inbounds float, float* %pSrc.addr.0.i, i32 4 + %cmp.i = icmp sgt i32 %blkCnt.0.i, 4 + br i1 %cmp.i, label %do.body.i, label %arm_mean_f32_mve.exit + +arm_mean_f32_mve.exit: ; preds = %do.body.i + %4 = extractelement <4 x float> %3, i32 3 + %add2.i.i = fadd fast float %4, %4 + %conv.i = uitofp i32 %blockSize to float + %div.i = fdiv fast float %add2.i.i, %conv.i + %.splatinsert = insertelement <4 x float> undef, float %div.i, i32 0 + %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + br label %do.body + +do.body: ; preds = %do.body, %arm_mean_f32_mve.exit + %blkCnt.0 = phi i32 [ %blockSize, %arm_mean_f32_mve.exit ], [ %sub, %do.body ] + %sumVec.0 = phi <4 x float> [ zeroinitializer, %arm_mean_f32_mve.exit ], [ %9, %do.body ] + %pSrc.addr.0 = phi float* [ %pSrc, %arm_mean_f32_mve.exit ], [ %add.ptr, %do.body ] + %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0) + %6 = bitcast float* %pSrc.addr.0 to <4 x float>* + %7 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %6, i32 4, <4 x i1> %5, <4 x float> zeroinitializer) + %8 = tail call fast <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> %7, <4 x float> %.splat, <4 x i1> %5, <4 x float> undef) + %9 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %8, <4 x float> %8, <4 x float> %sumVec.0, <4 x i1> %5) + %sub = add nsw i32 %blkCnt.0, -4 + %add.ptr = getelementptr inbounds float, float* %pSrc.addr.0, i32 4 + %cmp1 = icmp sgt i32 %blkCnt.0, 4 + br i1 %cmp1, label %do.body, label %do.end + +do.end: ; preds = %do.body + %10 = extractelement <4 x float> %9, i32 3 + %add2.i = fadd fast float %10, %10 + %sub2 = add i32 %blockSize, -1 + %conv = uitofp i32 %sub2 to float + %div = fdiv fast float %add2.i, %conv + br label %cleanup + +cleanup: ; preds = %entry, %do.end + store float %div, float* %pResult, align 4 + ret void +} + +declare <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) + +declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) + +declare <4 x i1> @llvm.arm.mve.vctp32(i32) + +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) + +declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) + From 7baed769c7ea8de27a1c077c7ff30f4e19988ade Mon Sep 17 00:00:00 2001 From: Jan Kratochvil Date: Tue, 18 Aug 2020 18:09:55 +0200 Subject: [PATCH 058/101] [lldb] [testsuite] Add split-file for check-lldb dependencies D85968 started to use `split-file` and while buildbots run fine while doing `make check-lldb` by hand I get: .../llvm-monorepo-clangassert/tools/lldb/test/SymbolFile/DWARF/Output/DW_AT_declaration-with-children.s.script: line 2: split-file: command not found failed: lldb-shell :: SymbolFile/DWARF/DW_AT_declaration-with-children.s Differential Revision: https://reviews.llvm.org/D86144 --- lldb/test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index c0249180253abe..21d8c61f11ed58 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -77,6 +77,7 @@ if(NOT LLDB_BUILT_STANDALONE) dsymutil llvm-strip not + split-file yaml2obj ) endif() From 3471520b1f6bc4fedfe45505f02924dc44e5106f Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 18 Aug 2020 17:15:45 +0100 Subject: [PATCH 059/101] [ARM] Allow tail predication of VLDn VLD2/4 instructions cannot be predicated, so we cannot tail predicate them from autovec. From intrinsics though, they should be valid as they will just end up loading extra values into off vector lanes, not effecting the on lanes. The same is true for loads in general where so long as we are not using the other vector lanes, an unpredicated load can be converted to a predicated one. This marks VLD2 and VLD4 instructions as validForTailPredication and allows any unpredicated load in tail predication loop, which seems to be valid given the other checks we have. Differential Revision: https://reviews.llvm.org/D86022 --- llvm/lib/Target/ARM/ARMInstrMVE.td | 1 + llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 6 +- .../Thumb2/LowOverheadLoops/unpredload.ll | 40 +++--------- .../unittests/Target/ARM/MachineInstrTest.cpp | 62 +++++++++++++++---- 4 files changed, 64 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index c4ce13677b309f..eda41e8eef065d 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5829,6 +5829,7 @@ class MVE_vldst24_base stage, bits<2> size, let mayLoad = load; let mayStore = !eq(load,0); let hasSideEffects = 0; + let validForTailPredication = load; } // A parameter class used to encapsulate all the ways the writeback diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 2e7cd412db1cc0..11e8aa742d89b3 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -782,7 +782,7 @@ bool LowOverheadLoop::ValidateLiveOuts() { // the false lanes are zeroed and here we're trying to track that those false // lanes remain zero, or where they change, the differences are masked away // by their user(s). - // All MVE loads and stores have to be predicated, so we know that any load + // All MVE stores have to be predicated, so we know that any predicate load // operands, or stored results are equivalent already. Other explicitly // predicated instructions will perform the same operation in the original // loop and the tail-predicated form too. Because of this, we can insert @@ -1038,8 +1038,8 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { } // If the instruction is already explicitly predicated, then the conversion - // will be fine, but ensure that all memory operations are predicated. - return !IsUse && MI->mayLoadOrStore() ? false : true; + // will be fine, but ensure that all store operations are predicated. + return !IsUse && MI->mayStore() ? false : true; } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll index 3f48cc3ad59b2e..440080e4e142dc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -6,26 +6,17 @@ define void @arm_cmplx_mag_squared_q15_mve(i16* %pSrc, i16* %pDst, i32 %blockSiz ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: subs.w r12, r2, #8 -; CHECK-NEXT: mov.w r3, #-1 -; CHECK-NEXT: csinv r3, r3, r12, pl -; CHECK-NEXT: add.w r12, r3, r2 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vld20.16 {q0, q1}, [r0] -; CHECK-NEXT: vctp.16 r2 -; CHECK-NEXT: subs r2, #8 ; CHECK-NEXT: vld21.16 {q0, q1}, [r0]! -; CHECK-NEXT: vpstttt -; CHECK-NEXT: vmulht.s16 q2, q1, q1 -; CHECK-NEXT: vmulht.s16 q0, q0, q0 -; CHECK-NEXT: vqaddt.s16 q0, q0, q2 -; CHECK-NEXT: vshrt.s16 q0, q0, #1 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r1], #16 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: vmulh.s16 q2, q1, q1 +; CHECK-NEXT: vmulh.s16 q0, q0, q0 +; CHECK-NEXT: vqadd.s16 q0, q0, q2 +; CHECK-NEXT: vshr.s16 q0, q0, #1 +; CHECK-NEXT: vstrh.16 q0, [r1], #16 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: pop {r7, pc} entry: @@ -148,25 +139,14 @@ define i32 @good2(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n ; CHECK-LABEL: good2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: mov r3, r2 -; CHECK-NEXT: cmp r2, #4 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, #4 -; CHECK-NEXT: subs r3, r2, r3 -; CHECK-NEXT: add.w r12, r3, #3 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w lr, r3, r12, lsr #2 ; CHECK-NEXT: mov.w r12, #0 -; CHECK-NEXT: dls lr, lr +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB3_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vpst -; CHECK-NEXT: vmlavat.s32 r12, q1, q0 -; CHECK-NEXT: le lr, .LBB3_1 +; CHECK-NEXT: vmlava.s32 r12, q1, q0 +; CHECK-NEXT: letp lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: pop {r7, pc} diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp index 792a15dcbfafd3..876e011e1ce8a6 100644 --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -382,7 +382,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { return false; case MVE_ASRLi: case MVE_ASRLr: - case MVE_LSRL: + case MVE_LSRL: case MVE_SQRSHR: case MVE_SQSHL: case MVE_SRSHR: @@ -393,7 +393,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VABDf32: case MVE_VABDs16: case MVE_VABDs32: - case MVE_VABDs8: + case MVE_VABDs8: case MVE_VABDu16: case MVE_VABDu32: case MVE_VABDu8: @@ -609,6 +609,42 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VIWDUPu16: case MVE_VIWDUPu32: case MVE_VIWDUPu8: + case MVE_VLD20_8: + case MVE_VLD21_8: + case MVE_VLD20_16: + case MVE_VLD21_16: + case MVE_VLD20_32: + case MVE_VLD21_32: + case MVE_VLD20_8_wb: + case MVE_VLD21_8_wb: + case MVE_VLD20_16_wb: + case MVE_VLD21_16_wb: + case MVE_VLD20_32_wb: + case MVE_VLD21_32_wb: + case MVE_VLD40_8: + case MVE_VLD41_8: + case MVE_VLD42_8: + case MVE_VLD43_8: + case MVE_VLD40_16: + case MVE_VLD41_16: + case MVE_VLD42_16: + case MVE_VLD43_16: + case MVE_VLD40_32: + case MVE_VLD41_32: + case MVE_VLD42_32: + case MVE_VLD43_32: + case MVE_VLD40_8_wb: + case MVE_VLD41_8_wb: + case MVE_VLD42_8_wb: + case MVE_VLD43_8_wb: + case MVE_VLD40_16_wb: + case MVE_VLD41_16_wb: + case MVE_VLD42_16_wb: + case MVE_VLD43_16_wb: + case MVE_VLD40_32_wb: + case MVE_VLD41_32_wb: + case MVE_VLD42_32_wb: + case MVE_VLD43_32_wb: case MVE_VLDRBS16: case MVE_VLDRBS16_post: case MVE_VLDRBS16_pre: @@ -657,9 +693,9 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VLDRWU32_rq_u: case MVE_VMOVimmf32: case MVE_VMOVimmi16: - case MVE_VMOVimmi32: + case MVE_VMOVimmi32: case MVE_VMOVimmi64: - case MVE_VMOVimmi8: + case MVE_VMOVimmi8: case MVE_VMOVNi16bh: case MVE_VMOVNi16th: case MVE_VMOVNi32bh: @@ -679,7 +715,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VMULLTs8: case MVE_VMULLTu16: case MVE_VMULLTu32: - case MVE_VMULLTu8: + case MVE_VMULLTu8: case MVE_VMUL_qr_f16: case MVE_VMUL_qr_f32: case MVE_VMUL_qr_i16: @@ -702,7 +738,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VORR: case MVE_VORRimmi16: case MVE_VORRimmi32: - case MVE_VPST: + case MVE_VPST: case MVE_VQABSs16: case MVE_VQABSs32: case MVE_VQABSs8: @@ -814,7 +850,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VRHADDs32: case MVE_VRHADDs8: case MVE_VRHADDu16: - case MVE_VRHADDu32: + case MVE_VRHADDu32: case MVE_VRHADDu8: case MVE_VRINTf16A: case MVE_VRINTf16M: @@ -825,12 +861,12 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VRINTf32A: case MVE_VRINTf32M: case MVE_VRINTf32N: - case MVE_VRINTf32P: - case MVE_VRINTf32X: + case MVE_VRINTf32P: + case MVE_VRINTf32X: case MVE_VRINTf32Z: case MVE_VRSHL_by_vecs16: case MVE_VRSHL_by_vecs32: - case MVE_VRSHL_by_vecs8: + case MVE_VRSHL_by_vecs8: case MVE_VRSHL_by_vecu16: case MVE_VRSHL_by_vecu32: case MVE_VRSHL_by_vecu8: @@ -887,7 +923,7 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { case MVE_VSTRB16_rq: case MVE_VSTRB32: case MVE_VSTRB32_post: - case MVE_VSTRB32_pre: + case MVE_VSTRB32_pre: case MVE_VSTRB32_rq: case MVE_VSTRB8_rq: case MVE_VSTRBU8: @@ -957,7 +993,9 @@ TEST(MachineInstrValidTailPredication, IsCorrect) { for (auto &Op : Desc.operands()) { // Only check instructions that access the MQPR regs. if ((Op.OperandType & MCOI::OPERAND_REGISTER) == 0 || - Op.RegClass != ARM::MQPRRegClassID) + (Op.RegClass != ARM::MQPRRegClassID && + Op.RegClass != ARM::QQPRRegClassID && + Op.RegClass != ARM::QQQQPRRegClassID)) continue; uint64_t Flags = MII->get(i).TSFlags; From ca77ab494aa29f7521ff797d230cd1b36cbe4e62 Mon Sep 17 00:00:00 2001 From: "Mott, Jeffrey T" Date: Fri, 17 Jul 2020 09:50:08 -0700 Subject: [PATCH 060/101] Disable use of _ExtInt with '__atomic' builtins We're (temporarily) disabling ExtInt for the '__atomic' builtins so we can better design their behavior later. The idea is until we do an audit/design for the way atomic builtins are supposed to work with _ExtInt, we should leave them restricted so they don't limit our future options, such as by binding us to a sub-optimal implementation via ABI. Example after this change: $ cat test.c void f(_ExtInt(64) *ptr) { __atomic_fetch_add(ptr, 1, 0); } $ clang -c test.c test.c:2:22: error: argument to atomic builtin of type '_ExtInt' is not supported __atomic_fetch_add(ptr, 1, 0); ^ 1 error generated. Differential Revision: https://reviews.llvm.org/D84049 --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 7 ++++--- clang/lib/Sema/SemaChecking.cpp | 5 +++++ clang/lib/Sema/SemaType.cpp | 5 +---- clang/test/Sema/builtins.c | 4 ++++ clang/test/SemaCXX/ext-int.cpp | 5 +++-- libcxx/test/libcxx/atomics/ext-int.verify.cpp | 11 +++++++++++ 6 files changed, 28 insertions(+), 9 deletions(-) create mode 100644 libcxx/test/libcxx/atomics/ext-int.verify.cpp diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index fd21285b1f7929..a63fae5b5f726c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6038,9 +6038,8 @@ def err_func_def_incomplete_result : Error< def err_atomic_specifier_bad_type : Error<"_Atomic cannot be applied to " "%select{incomplete |array |function |reference |atomic |qualified " - "|sizeless ||integer |integer }0type " - "%1 %select{|||||||which is not trivially copyable|with less than " - "1 byte of precision|with a non power of 2 precision}0">; + "|sizeless ||integer }0type " + "%1 %select{|||||||which is not trivially copyable|}0">; // Expressions. def ext_sizeof_alignof_function_type : Extension< @@ -7967,6 +7966,8 @@ def err_atomic_exclusive_builtin_pointer_size : Error< " 1,2,4 or 8 byte type (%0 invalid)">; def err_atomic_builtin_ext_int_size : Error< "Atomic memory operand must have a power-of-two size">; +def err_atomic_builtin_ext_int_prohibit : Error< + "argument to atomic builtin of type '_ExtInt' is not supported">; def err_atomic_op_needs_atomic : Error< "address argument to atomic operation must be a pointer to _Atomic " "type (%0 invalid)">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 4efd62f58d2e6d..70d3a682fc7028 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5050,6 +5050,11 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange, ? 0 : 1); + if (ValType->isExtIntType()) { + Diag(Ptr->getExprLoc(), diag::err_atomic_builtin_ext_int_prohibit); + return ExprError(); + } + return AE; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index b2be31ac09904e..4ab5cc5fd8b989 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8963,11 +8963,8 @@ QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) { else if (!T.isTriviallyCopyableType(Context)) // Some other non-trivially-copyable type (probably a C++ class) DisallowedKind = 7; - else if (auto *ExtTy = T->getAs()) { - if (ExtTy->getNumBits() < 8) + else if (T->isExtIntType()) { DisallowedKind = 8; - else if (!llvm::isPowerOf2_32(ExtTy->getNumBits())) - DisallowedKind = 9; } if (DisallowedKind != -1) { diff --git a/clang/test/Sema/builtins.c b/clang/test/Sema/builtins.c index 4b445724f712a1..e4093edb5f0063 100644 --- a/clang/test/Sema/builtins.c +++ b/clang/test/Sema/builtins.c @@ -285,12 +285,16 @@ void test_ei_i42i(_ExtInt(42) *ptr, int value) { __sync_fetch_and_add(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}} // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}} __sync_nand_and_fetch(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}} + + __atomic_fetch_add(ptr, 1, 0); // expected-error {{argument to atomic builtin of type '_ExtInt' is not supported}} } void test_ei_i64i(_ExtInt(64) *ptr, int value) { __sync_fetch_and_add(ptr, value); // expect success // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}} __sync_nand_and_fetch(ptr, value); // expect success + + __atomic_fetch_add(ptr, 1, 0); // expected-error {{argument to atomic builtin of type '_ExtInt' is not supported}} } void test_ei_ii42(int *ptr, _ExtInt(42) value) { diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp index 0f2a3b89be1f1c..a619cd2eb5de1a 100644 --- a/clang/test/SemaCXX/ext-int.cpp +++ b/clang/test/SemaCXX/ext-int.cpp @@ -91,10 +91,11 @@ typedef _ExtInt(32) __attribute__((vector_size(16))) VecTy; _Complex _ExtInt(3) Cmplx; // Reject cases of _Atomic: -// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)' with less than 1 byte of precision}} +// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)'}} _Atomic _ExtInt(4) TooSmallAtomic; -// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)' with a non power of 2 precision}} +// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)'}} _Atomic _ExtInt(9) NotPow2Atomic; +// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(128)'}} _Atomic _ExtInt(128) JustRightAtomic; // Test result types of Unary/Bitwise/Binary Operations: diff --git a/libcxx/test/libcxx/atomics/ext-int.verify.cpp b/libcxx/test/libcxx/atomics/ext-int.verify.cpp new file mode 100644 index 00000000000000..3f57437f43cc6f --- /dev/null +++ b/libcxx/test/libcxx/atomics/ext-int.verify.cpp @@ -0,0 +1,11 @@ +// REQUIRES: clang-11 + +#include + +int main(int, char**) +{ + // expected-error@atomic:*1 {{_Atomic cannot be applied to integer type '_ExtInt(32)'}} + std::atomic<_ExtInt(32)> x {42}; + + return 0; +} From c466c5fa7ee90f90c0e1d08777f1f085bb78a475 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 18 Aug 2020 09:20:05 -0700 Subject: [PATCH 061/101] [ARM] Fix build after D86087 --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 11e8aa742d89b3..4d1ab88fe3b2c8 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1342,7 +1342,7 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { MachineBasicBlock *MBB = InsertPt->getParent(); bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; unsigned Opc = LoLoop.getStartOpcode(); - MachineOperand &Count = LoLoop.getCount(); + MachineOperand &Count = LoLoop.getLoopStartOperand(); MachineInstrBuilder MIB = BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); From 8f4859d35120b007c53ac075375d9d1791ec6c86 Mon Sep 17 00:00:00 2001 From: Mauricio Sifontes Date: Tue, 18 Aug 2020 16:47:06 +0000 Subject: [PATCH 062/101] Create Optimization Pass Wrapper for MLIR Reduce Create a reduction pass that accepts an optimization pass as argument and only replaces the golden module in the pipeline if the output of the optimization pass is smaller than the input and still exhibits the interesting behavior. Add a -test-pass option to test individual passes in the MLIR Reduce tool. Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D84783 --- mlir/include/mlir/Reducer/OptReductionPass.h | 52 ++++++++++++++++++ mlir/include/mlir/Reducer/Passes.td | 7 ++- mlir/include/mlir/Reducer/ReductionTreePass.h | 2 +- mlir/test/mlir-reduce/dce-test.mlir | 17 ++++++ ...-tree-pass.mlir => multiple-function.mlir} | 2 +- .../{testcase-linux.mlir => simple-test.mlir} | 4 +- ...reducer-pass.mlir => single-function.mlir} | 2 +- mlir/tools/mlir-reduce/CMakeLists.txt | 1 + mlir/tools/mlir-reduce/OptReductionPass.cpp | 55 +++++++++++++++++++ mlir/tools/mlir-reduce/mlir-reduce.cpp | 23 ++++++-- 10 files changed, 154 insertions(+), 11 deletions(-) create mode 100644 mlir/include/mlir/Reducer/OptReductionPass.h create mode 100644 mlir/test/mlir-reduce/dce-test.mlir rename mlir/test/mlir-reduce/{reduction-tree-pass.mlir => multiple-function.mlir} (90%) rename mlir/test/mlir-reduce/{testcase-linux.mlir => simple-test.mlir} (81%) rename mlir/test/mlir-reduce/{test-reducer-pass.mlir => single-function.mlir} (52%) create mode 100644 mlir/tools/mlir-reduce/OptReductionPass.cpp diff --git a/mlir/include/mlir/Reducer/OptReductionPass.h b/mlir/include/mlir/Reducer/OptReductionPass.h new file mode 100644 index 00000000000000..2168ea2159506b --- /dev/null +++ b/mlir/include/mlir/Reducer/OptReductionPass.h @@ -0,0 +1,52 @@ +//===- OptReductionPass.h - Optimization Reduction Pass Wrapper -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Opt Reduction Pass Wrapper. It creates a pass to run +// any optimization pass within it and only replaces the output module with the +// transformed version if it is smaller and interesting. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_REDUCER_OPTREDUCTIONPASS_H +#define MLIR_REDUCER_OPTREDUCTIONPASS_H + +#include "PassDetail.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Reducer/ReductionNode.h" +#include "mlir/Reducer/ReductionTreePass.h" +#include "mlir/Reducer/Tester.h" +#include "mlir/Transforms/Passes.h" +#include "llvm/Support/Debug.h" + +namespace mlir { + +class OptReductionPass : public OptReductionBase { +public: + OptReductionPass(const Tester *test, MLIRContext *context, + std::unique_ptr optPass); + + OptReductionPass(const OptReductionPass &srcPass); + + /// Runs the pass instance in the pass pipeline. + void runOnOperation() override; + +private: + // Points to the context to be used in the pass manager. + MLIRContext *context; + + // This is used to test the interesting behavior of the transformed module. + const Tester *test; + + // Points to the mlir-opt pass to be called. + std::unique_ptr optPass; +}; + +} // end namespace mlir + +#endif diff --git a/mlir/include/mlir/Reducer/Passes.td b/mlir/include/mlir/Reducer/Passes.td index 4703dd746a7095..d3a934ef693345 100644 --- a/mlir/include/mlir/Reducer/Passes.td +++ b/mlir/include/mlir/Reducer/Passes.td @@ -17,7 +17,10 @@ include "mlir/Pass/PassBase.td" def ReductionTree : Pass<"reduction-tree", "ModuleOp"> { let summary = "A general reduction tree pass for the MLIR Reduce Tool"; - let constructor = "mlir::createReductionTreePass()"; } -#endif // MLIR_REDUCE_PASSES +def OptReduction : Pass<"opt-reduction-pass", "ModuleOp"> { + let summary = "A reduction pass wrapper for optimization passes"; +} + +#endif // MLIR_REDUCER_PASSES diff --git a/mlir/include/mlir/Reducer/ReductionTreePass.h b/mlir/include/mlir/Reducer/ReductionTreePass.h index 01104aa0429b02..d07a475e4f9948 100644 --- a/mlir/include/mlir/Reducer/ReductionTreePass.h +++ b/mlir/include/mlir/Reducer/ReductionTreePass.h @@ -34,7 +34,7 @@ enum TraversalMode { SinglePath, MultiPath, Concurrent, Backtrack }; // class. class ReductionTreeUtils { public: - void updateGoldenModule(ModuleOp &golden, ModuleOp reduced); + static void updateGoldenModule(ModuleOp &golden, ModuleOp reduced); }; /// This class defines the Reduction Tree Pass. It provides a framework to diff --git a/mlir/test/mlir-reduce/dce-test.mlir b/mlir/test/mlir-reduce/dce-test.mlir new file mode 100644 index 00000000000000..e368343e056a0a --- /dev/null +++ b/mlir/test/mlir-reduce/dce-test.mlir @@ -0,0 +1,17 @@ +// UNSUPPORTED: -windows- +// RUN: mlir-reduce %s -test %S/failure-test.sh -pass-test DCE | FileCheck %s +// This input should be reduced by the pass pipeline so that only +// the @simple1 function remains as the other fucntions should be +// removed by the dead code elimination pass. +// CHECK-LABEL: func @simple1(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + +// CHECK-NOT: func @dead_nested_function +func @dead_private_function() attributes { sym_visibility = "private" } + +// CHECK-NOT: func @dead_nested_function +func @dead_nested_function() attributes { sym_visibility = "nested" } + +func @simple1(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { + "test.crashOp" () : () -> () + return +} diff --git a/mlir/test/mlir-reduce/reduction-tree-pass.mlir b/mlir/test/mlir-reduce/multiple-function.mlir similarity index 90% rename from mlir/test/mlir-reduce/reduction-tree-pass.mlir rename to mlir/test/mlir-reduce/multiple-function.mlir index dc04a626d1915a..d225df8b8676d7 100644 --- a/mlir/test/mlir-reduce/reduction-tree-pass.mlir +++ b/mlir/test/mlir-reduce/multiple-function.mlir @@ -1,5 +1,5 @@ // UNSUPPORTED: -windows- -// RUN: mlir-reduce %s -test %S/failure-test.sh | FileCheck %s +// RUN: mlir-reduce %s -test %S/failure-test.sh -pass-test function-reducer | FileCheck %s // This input should be reduced by the pass pipeline so that only // the @simple5 function remains as this is the shortest function // containing the interesting behavior. diff --git a/mlir/test/mlir-reduce/testcase-linux.mlir b/mlir/test/mlir-reduce/simple-test.mlir similarity index 81% rename from mlir/test/mlir-reduce/testcase-linux.mlir rename to mlir/test/mlir-reduce/simple-test.mlir index f2bb161bb5a690..5329e9552f5bb2 100644 --- a/mlir/test/mlir-reduce/testcase-linux.mlir +++ b/mlir/test/mlir-reduce/simple-test.mlir @@ -1,5 +1,5 @@ // UNSUPPORTED: -windows- -// RUN: mlir-reduce %s -test %S/test.sh +// RUN: mlir-reduce %s -test %S/test.sh -pass-test function func @simple1(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { cond_br %arg0, ^bb1, ^bb2 @@ -10,4 +10,4 @@ func @simple1(%arg0: i1, %arg1: memref<2xf32>, %arg2: memref<2xf32>) { br ^bb3(%0 : memref<2xf32>) ^bb3(%1: memref<2xf32>): return -} \ No newline at end of file +} diff --git a/mlir/test/mlir-reduce/test-reducer-pass.mlir b/mlir/test/mlir-reduce/single-function.mlir similarity index 52% rename from mlir/test/mlir-reduce/test-reducer-pass.mlir rename to mlir/test/mlir-reduce/single-function.mlir index da5b0c96335530..732963553e9002 100644 --- a/mlir/test/mlir-reduce/test-reducer-pass.mlir +++ b/mlir/test/mlir-reduce/single-function.mlir @@ -1,5 +1,5 @@ // RUN: mlir-opt %s -// RUN: not mlir-opt %s -test-mlir-reducer +// RUN: not mlir-opt %s -test-mlir-reducer -pass-test function-reducer func @test() { "test.crashOp"() : () -> () diff --git a/mlir/tools/mlir-reduce/CMakeLists.txt b/mlir/tools/mlir-reduce/CMakeLists.txt index b3a7c36a030928..f581eee21fab62 100644 --- a/mlir/tools/mlir-reduce/CMakeLists.txt +++ b/mlir/tools/mlir-reduce/CMakeLists.txt @@ -32,6 +32,7 @@ set(LIBS ) add_llvm_tool(mlir-reduce + OptReductionPass.cpp Passes/FunctionReducer.cpp ReductionNode.cpp ReductionTreePass.cpp diff --git a/mlir/tools/mlir-reduce/OptReductionPass.cpp b/mlir/tools/mlir-reduce/OptReductionPass.cpp new file mode 100644 index 00000000000000..dbb3d97046d476 --- /dev/null +++ b/mlir/tools/mlir-reduce/OptReductionPass.cpp @@ -0,0 +1,55 @@ +//===- OptReductionPass.cpp - Optimization Reduction Pass Wrapper ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Opt Reduction Pass class. It creates a pass to run +// any optimization pass within it and only replaces the output module with the +// transformed version if it is smaller and interesting. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Reducer/OptReductionPass.h" + +#define DEBUG_TYPE "mlir-reduce" + +using namespace mlir; + +OptReductionPass::OptReductionPass(const Tester *test, MLIRContext *context, + std::unique_ptr optPass) + : context(context), test(test), optPass(std::move(optPass)) {} + +OptReductionPass::OptReductionPass(const OptReductionPass &srcPass) + : test(srcPass.test), optPass(srcPass.optPass.get()) {} + +/// Runs the pass instance in the pass pipeline. +void OptReductionPass::runOnOperation() { + LLVM_DEBUG(llvm::dbgs() << "\nOptimization Reduction pass: "); + LLVM_DEBUG(llvm::dbgs() << optPass.get()->getName() << "\nTesting:\n"); + + ModuleOp module = this->getOperation(); + ModuleOp moduleVariant = module.clone(); + PassManager pmTransform(context); + pmTransform.addPass(std::move(optPass)); + + if (failed(pmTransform.run(moduleVariant))) + return; + + ReductionNode original(module, nullptr); + original.measureAndTest(test); + + ReductionNode reduced(moduleVariant, nullptr); + reduced.measureAndTest(test); + + if (reduced.isInteresting() && reduced.getSize() < original.getSize()) { + ReductionTreeUtils::updateGoldenModule(module, reduced.getModule().clone()); + LLVM_DEBUG(llvm::dbgs() << "\nSuccessful Transformed version\n\n"); + } else { + LLVM_DEBUG(llvm::dbgs() << "\nUnsuccessful Transformed version\n\n"); + } + + LLVM_DEBUG(llvm::dbgs() << "Pass Complete\n\n"); +} diff --git a/mlir/tools/mlir-reduce/mlir-reduce.cpp b/mlir/tools/mlir-reduce/mlir-reduce.cpp index 93de0703d8927e..4c69aa0ad2172b 100644 --- a/mlir/tools/mlir-reduce/mlir-reduce.cpp +++ b/mlir/tools/mlir-reduce/mlir-reduce.cpp @@ -19,6 +19,7 @@ #include "mlir/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Reducer/OptReductionPass.h" #include "mlir/Reducer/ReductionNode.h" #include "mlir/Reducer/ReductionTreePass.h" #include "mlir/Reducer/Tester.h" @@ -46,6 +47,11 @@ static llvm::cl::opt llvm::cl::desc("Output filename for the reduced test case"), llvm::cl::init("-")); +// TODO: Use PassPipelineCLParser to define pass pieplines in the command line. +static llvm::cl::opt + passTestSpecifier("pass-test", + llvm::cl::desc("Indicate a specific pass to be tested")); + // Parse and verify the input MLIR file. static LogicalResult loadModule(MLIRContext &context, OwningModuleRef &module, StringRef inputFilename) { @@ -94,10 +100,19 @@ int main(int argc, char **argv) { // Reduction pass pipeline. PassManager pm(&context); - // Reduction tree pass with OpReducer variant generation and single path - // traversal. - pm.addPass( - std::make_unique>(&test)); + if (passTestSpecifier == "DCE") { + + // Opt Reduction Pass with SymbolDCEPass as opt pass. + pm.addPass(std::make_unique(&test, &context, + createSymbolDCEPass())); + + } else if (passTestSpecifier == "function-reducer") { + + // Reduction tree pass with OpReducer variant generation and single path + // traversal. + pm.addPass(std::make_unique>( + &test)); + } ModuleOp m = moduleRef.get().clone(); From 501a078cbb4a79170fccf1346d772dae3d318057 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 18 Aug 2020 09:49:05 -0700 Subject: [PATCH 063/101] Revert "[TSan][libdispatch] Add interceptors for dispatch_async_and_wait()" This reverts commit d137db80297f286f3a19eacc63d4a980646da437. Breaks builds on older SDKs. --- .../rtl/tsan_interceptors_libdispatch.cpp | 5 --- .../test/tsan/libdispatch/async_and_wait.c | 31 ------------------- 2 files changed, 36 deletions(-) delete mode 100644 compiler-rt/test/tsan/libdispatch/async_and_wait.c diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp index 292ea5fbb23931..5dacd3256abc9a 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_libdispatch.cpp @@ -219,9 +219,6 @@ static void invoke_and_release_block(void *param) { DISPATCH_INTERCEPT(dispatch, false) DISPATCH_INTERCEPT(dispatch_barrier, true) -DISPATCH_INTERCEPT_SYNC_F(dispatch_async_and_wait_f, false) -DISPATCH_INTERCEPT_SYNC_B(dispatch_async_and_wait, false) - DECLARE_REAL(void, dispatch_after_f, dispatch_time_t when, dispatch_queue_t queue, void *context, dispatch_function_t work) @@ -749,8 +746,6 @@ void InitializeLibdispatchInterceptors() { INTERCEPT_FUNCTION(dispatch_barrier_async_f); INTERCEPT_FUNCTION(dispatch_barrier_sync); INTERCEPT_FUNCTION(dispatch_barrier_sync_f); - INTERCEPT_FUNCTION(dispatch_async_and_wait); - INTERCEPT_FUNCTION(dispatch_async_and_wait_f); INTERCEPT_FUNCTION(dispatch_after); INTERCEPT_FUNCTION(dispatch_after_f); INTERCEPT_FUNCTION(dispatch_once); diff --git a/compiler-rt/test/tsan/libdispatch/async_and_wait.c b/compiler-rt/test/tsan/libdispatch/async_and_wait.c deleted file mode 100644 index 5e63c118aef53d..00000000000000 --- a/compiler-rt/test/tsan/libdispatch/async_and_wait.c +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %clang_tsan %s -o %t -// RUN: %run %t 2>&1 | FileCheck %s --implicit-check-not='ThreadSanitizer' - -#include "dispatch/dispatch.h" - -#include - -long global; - -int main() { - dispatch_queue_t q = dispatch_queue_create("my.queue", DISPATCH_QUEUE_SERIAL); - dispatch_semaphore_t s = dispatch_semaphore_create(0); - - // Force queue to context switch onto separate thread. - dispatch_async(q, ^{ - dispatch_semaphore_wait(s, DISPATCH_TIME_FOREVER); - }); - dispatch_semaphore_signal(s); - - global++; - dispatch_async_and_wait(q, ^{ - // The queue continues to execute on separate thread. This would cause a - // race if we had used `dispatch_async()` without the `_and_wait` part. - global++; - }); - global++; - - fprintf(stderr, "Done.\n"); -} - -// CHECK: Done. From 11ff5176c4655526960dd01024f5d1f99499d4ad Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 18 Aug 2020 17:08:49 +0100 Subject: [PATCH 064/101] [X86][AVX] lowerShuffleWithVPMOV - add non-VLX support. We can efficiently handle non-VLX cases now that we have the getAVX512TruncNode helper. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 17 ++------ llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll | 40 ++++++++----------- 2 files changed, 19 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ec4d236dc3ea19..2b19254c4344b6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11361,14 +11361,8 @@ static bool matchShuffleAsVPMOV(ArrayRef Mask, bool SwappedOps, // t51: v8i16 = vector_shuffle<0,2,4,6,12,13,14,15> t41, t21 // t18: v2i64 = bitcast t51 // -// Without avx512vl, this is lowered to: -// -// vpmovqd %zmm0, %ymm0 -// vpshufb {{.*#+}} xmm0 = -// xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero -// -// But when avx512vl is available, one can just use a single vpmovdw -// instruction. +// One can just use a single vpmovdw instruction, without avx512vl we need to +// use the zmm variant and extract the lower subvector, padding with zeroes. // TODO: Merge with lowerShuffleAsVTRUNC. static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, @@ -11400,11 +11394,6 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, SDValue Src = V1.getOperand(0).getOperand(0); MVT SrcVT = Src.getSimpleValueType(); - // The vptrunc** instructions truncating 128 bit and 256 bit vectors - // are only available with avx512vl. - if (!SrcVT.is512BitVector() && !Subtarget.hasVLX()) - return SDValue(); - // Down Convert Word to Byte is only available with avx512bw. The case with // 256-bit output doesn't contain a shuffle and is therefore not handled here. if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 && @@ -11417,7 +11406,7 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, !matchShuffleAsVPMOV(Mask, SwappedOps, 4)) return SDValue(); - return DAG.getNode(X86ISD::VTRUNC, DL, VT, Src); + return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, true); } // Attempt to match binary shuffle patterns as a truncate. diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll index 1559fdbbe72c8c..3919f326d39a53 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll @@ -561,9 +561,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou ; ; AVX512F-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -575,9 +574,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_with_zext_return_v16i8(<8 x i32> %vec) nou ; ; AVX512BW-LABEL: trunc_v8i32_to_v8i8_with_zext_return_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -621,9 +619,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou ; ; AVX512F-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -635,9 +632,8 @@ define <16 x i8> @trunc_v8i32_to_v8i8_via_v8i16_return_v16i8(<8 x i32> %vec) nou ; ; AVX512BW-LABEL: trunc_v8i32_to_v8i8_via_v8i16_return_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -816,9 +812,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no ; ; AVX512F-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -830,9 +825,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_with_zext_return_v8i16(<4 x i64> %vec) no ; ; AVX512BW-LABEL: trunc_v4i64_to_v4i16_with_zext_return_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -881,9 +875,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no ; ; AVX512F-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -895,9 +888,8 @@ define <8 x i16> @trunc_v4i64_to_v4i16_via_v4i32_return_v8i16(<4 x i64> %vec) no ; ; AVX512BW-LABEL: trunc_v4i64_to_v4i16_via_v4i32_return_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; From a1caa302970de86b15d360212b526be8f1d59641 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 14 Aug 2020 17:09:23 -0700 Subject: [PATCH 065/101] [gn build] Add support for expensive checks Reviewed By: hans, MaskRay Differential Revision: https://reviews.llvm.org/D86007 --- llvm/utils/gn/build/BUILD.gn | 4 ++++ llvm/utils/gn/build/buildflags.gni | 3 +++ llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn | 8 +++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn index e29cdb678a361f..3c0b905991b50f 100644 --- a/llvm/utils/gn/build/BUILD.gn +++ b/llvm/utils/gn/build/BUILD.gn @@ -34,6 +34,10 @@ config("compiler_defaults") { defines += [ "NDEBUG" ] } + if (llvm_enable_expensive_checks) { + defines += [ "EXPENSIVE_CHECKS" ] + } + asmflags = target_flags cflags = target_flags ldflags = target_flags + target_ldflags diff --git a/llvm/utils/gn/build/buildflags.gni b/llvm/utils/gn/build/buildflags.gni index 4dcdc962b7d116..eb8ac55e48e01b 100644 --- a/llvm/utils/gn/build/buildflags.gni +++ b/llvm/utils/gn/build/buildflags.gni @@ -10,4 +10,7 @@ declare_args() { # Whether to enable assertions. llvm_enable_assertions = true + + # Whether to enable expensive checks. + llvm_enable_expensive_checks = false } diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn index 5f8058699d7293..32480e51a4c270 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn @@ -1,6 +1,7 @@ import("//compiler-rt/target.gni") import("//compiler-rt/test/test.gni") import("//llvm/triples.gni") +import("//llvm/utils/gn/build/buildflags.gni") import("//llvm/utils/gn/build/libs/zlib/enable.gni") import("//llvm/utils/gn/build/toolchain/compiler.gni") import("//llvm/utils/gn/build/write_cmake_config.gni") @@ -51,12 +52,17 @@ write_cmake_config("lit_common_configured") { "SANITIZER_CAN_USE_CXXABI_PYBOOL=True", "COMPILER_RT_HAS_LLD_PYBOOL=True", "COMPILER_RT_HAS_GWP_ASAN_PYBOOL=False", - "LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL=False", "HAVE_RPC_XDR_H=0", "ANDROID_NDK_VERSION=19", "ANDROID_SERIAL_FOR_TESTING=$android_serial_for_testing", ] + if (llvm_enable_expensive_checks) { + values += [ "LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL=True" ] + } else { + values += [ "LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL=False" ] + } + if (host_cpu == "x64") { values += [ "HOST_ARCH=x86_64" ] } else { From 55565752306e352e655bf8a4ba919c14d6b195c2 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 13 Aug 2020 14:59:58 -0700 Subject: [PATCH 066/101] Added std.floor operation to match std.ceil There should be an equivalent std.floor op to std.ceil. This includes matching lowerings for SPIRV, NVVM, ROCDL, and LLVM. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D85940 --- .../mlir/Dialect/StandardOps/IR/Ops.td | 33 +++++++++++++++++++ .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 5 ++- .../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 5 ++- .../StandardToLLVM/StandardToLLVM.cpp | 2 ++ .../ConvertStandardToSPIRV.cpp | 1 + .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 15 +++++++++ .../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 15 +++++++++ .../StandardToLLVM/standard-to-llvm.mlir | 21 ++++++++++++ .../StandardToSPIRV/std-ops-to-spirv.mlir | 2 ++ mlir/test/IR/core-ops.mlir | 12 +++++++ 10 files changed, 109 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 088f262790d6c3..510d485d019f18 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -814,6 +814,39 @@ def CeilFOp : FloatUnaryOp<"ceilf"> { }]; } +//===----------------------------------------------------------------------===// +// FloorFOp +//===----------------------------------------------------------------------===// + +def FloorFOp : FloatUnaryOp<"floorf"> { + let summary = "floor of the specified value"; + let description = [{ + Syntax: + + ``` + operation ::= ssa-id `=` `std.floorf` ssa-use `:` type + ``` + + The `floorf` operation computes the floor of a given value. It takes one + operand and returns one result of the same type. This type may be a float + scalar type, a vector whose element type is float, or a tensor of floats. + It has no standard attributes. + + Example: + + ```mlir + // Scalar floor value. + %a = floorf %b : f64 + + // SIMD vector element-wise floor value. + %f = floorf %g : vector<4xf32> + + // Tensor element-wise floor value. + %x = floorf %y : tensor<4x?xf8> + ``` + }]; +} + //===----------------------------------------------------------------------===// // CmpFOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 76c166842c2d95..d11cc51d1d594f 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -137,7 +137,8 @@ struct LowerGpuOpsToNVVMOpsPass LLVMConversionTarget target(getContext()); target.addIllegalDialect(); target.addIllegalOp(); + LLVM::FFloorOp, LLVM::LogOp, LLVM::Log10Op, + LLVM::Log2Op>(); target.addIllegalOp(); target.addLegalDialect(); // TODO: Remove once we support replacing non-root ops. @@ -174,6 +175,8 @@ void mlir::populateGpuToNVVMConversionPatterns( "__nv_cos"); patterns.insert>(converter, "__nv_expf", "__nv_exp"); + patterns.insert>(converter, "__nv_floorf", + "__nv_floor"); patterns.insert>(converter, "__nv_logf", "__nv_log"); patterns.insert>(converter, "__nv_log10f", diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp index 697f8078e725da..40cf097c9c5a9e 100644 --- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp +++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp @@ -71,7 +71,8 @@ struct LowerGpuOpsToROCDLOpsPass LLVMConversionTarget target(getContext()); target.addIllegalDialect(); target.addIllegalOp(); + LLVM::FFloorOp, LLVM::LogOp, LLVM::Log10Op, + LLVM::Log2Op>(); target.addIllegalOp(); target.addLegalDialect(); // TODO: Remove once we support replacing non-root ops. @@ -104,6 +105,8 @@ void mlir::populateGpuToROCDLConversionPatterns( "__ocml_cos_f64"); patterns.insert>(converter, "__ocml_exp_f32", "__ocml_exp_f64"); + patterns.insert>(converter, "__ocml_floor_f32", + "__ocml_floor_f64"); patterns.insert>(converter, "__ocml_log_f32", "__ocml_log_f64"); patterns.insert>(converter, "__ocml_log10_f32", diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index 4a061963fce3aa..0ee1166b1a643b 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -1418,6 +1418,7 @@ using CosOpLowering = VectorConvertToLLVMPattern; using DivFOpLowering = VectorConvertToLLVMPattern; using ExpOpLowering = VectorConvertToLLVMPattern; using Exp2OpLowering = VectorConvertToLLVMPattern; +using FloorFOpLowering = VectorConvertToLLVMPattern; using Log10OpLowering = VectorConvertToLLVMPattern; using Log2OpLowering = VectorConvertToLLVMPattern; using LogOpLowering = VectorConvertToLLVMPattern; @@ -3285,6 +3286,7 @@ void mlir::populateStdToLLVMNonMemoryConversionPatterns( DivFOpLowering, ExpOpLowering, Exp2OpLowering, + FloorFOpLowering, GenericAtomicRMWOpLowering, LogOpLowering, Log10OpLowering, diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp index 268139faa2fdde..6ae17c33070cfa 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp @@ -1076,6 +1076,7 @@ void populateStandardToSPIRVPatterns(MLIRContext *context, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, + UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, UnaryAndBinaryOpPattern, diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index df38df1749cbc0..6b071a053ce385 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -172,6 +172,21 @@ gpu.module @test_module { // ----- +gpu.module @test_module { + // CHECK: llvm.func @__nv_floorf(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__nv_floor(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_floor + func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = std.floorf %arg_f32 : f32 + // CHECK: llvm.call @__nv_floorf(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.floorf %arg_f64 : f64 + // CHECK: llvm.call @__nv_floor(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return %result32, %result64 : f32, f64 + } +} + +// ----- + gpu.module @test_module { // CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float // CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir index a7565bb6e323f0..b17d75fd7afb0b 100644 --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -114,6 +114,21 @@ gpu.module @test_module { // ----- +gpu.module @test_module { + // CHECK: llvm.func @__ocml_floor_f32(!llvm.float) -> !llvm.float + // CHECK: llvm.func @__ocml_floor_f64(!llvm.double) -> !llvm.double + // CHECK-LABEL: func @gpu_floor + func @gpu_floor(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = std.floorf %arg_f32 : f32 + // CHECK: llvm.call @__ocml_floor_f32(%{{.*}}) : (!llvm.float) -> !llvm.float + %result64 = std.floorf %arg_f64 : f64 + // CHECK: llvm.call @__ocml_floor_f64(%{{.*}}) : (!llvm.double) -> !llvm.double + std.return %result32, %result64 : f32, f64 + } +} + +// ----- + gpu.module @test_module { // CHECK: llvm.func @__ocml_cos_f32(!llvm.float) -> !llvm.float // CHECK: llvm.func @__ocml_cos_f64(!llvm.double) -> !llvm.double diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir index c55950a556344f..c7363085817e1e 100644 --- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir +++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir @@ -40,6 +40,27 @@ func @sine(%arg0 : f32) { // ----- +// CHECK-LABEL: func @ceilf( +// CHECK-SAME: !llvm.float +func @ceilf(%arg0 : f32) { + // CHECK: "llvm.intr.ceil"(%arg0) : (!llvm.float) -> !llvm.float + %0 = ceilf %arg0 : f32 + std.return +} + +// ----- + +// CHECK-LABEL: func @floorf( +// CHECK-SAME: !llvm.float +func @floorf(%arg0 : f32) { + // CHECK: "llvm.intr.floor"(%arg0) : (!llvm.float) -> !llvm.float + %0 = floorf %arg0 : f32 + std.return +} + +// ----- + + // CHECK-LABEL: func @rsqrt_double( // CHECK-SAME: !llvm.double func @rsqrt_double(%arg0 : f64) { diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir index e85f78f757a3a3..1b83af1be7551e 100644 --- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir +++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir @@ -65,6 +65,8 @@ func @float32_unary_scalar(%arg0: f32) { %8 = tanh %arg0 : f32 // CHECK: spv.GLSL.Sin %{{.*}}: f32 %9 = sin %arg0 : f32 + // CHECK: spv.GLSL.Floor %{{.*}}: f32 + %10 = floorf %arg0 : f32 return } diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 74470719047791..69e974bc41734d 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -554,6 +554,18 @@ func @standard_instrs(tensor<4x4x?xf32>, f32, i32, index, i64, f16) { // CHECK: = fptosi {{.*}} : f16 to i64 %162 = fptosi %half : f16 to i64 + // CHECK: floorf %arg1 : f32 + %163 = "std.floorf"(%f) : (f32) -> f32 + + // CHECK: %{{[0-9]+}} = floorf %arg1 : f32 + %164 = floorf %f : f32 + + // CHECK: %{{[0-9]+}} = floorf %cst_8 : vector<4xf32> + %165 = floorf %vcf32 : vector<4xf32> + + // CHECK: %{{[0-9]+}} = floorf %arg0 : tensor<4x4x?xf32> + %166 = floorf %t : tensor<4x4x?xf32> + return } From 40e269ea6db9c755c27e2ee1e201a640ac085afd Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 14 Aug 2020 01:58:00 -0700 Subject: [PATCH 067/101] [GlobalISel] Add a combine for ashr(shl x, c), c --> sext_inreg x, c' By detecting this sign extend pattern early, we can uncover opportunities for more optimizations. Differential Revision: https://reviews.llvm.org/D85965 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 6 ++ .../llvm/CodeGen/GlobalISel/MIPatternMatch.h | 6 ++ .../include/llvm/Target/GlobalISel/Combine.td | 12 ++- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 30 +++++++ ...galizercombiner-ashr-shl-to-sext-inreg.mir | 90 +++++++++++++++++++ .../AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll | 24 ++--- .../AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll | 35 ++++---- .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 9 +- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 9 +- 9 files changed, 174 insertions(+), 47 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e632f5fd05ec2b..e5f2700f6de9a2 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -294,6 +294,12 @@ class CombinerHelper { bool applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo); + /// Match ashr (shl x, C), C -> sext_inreg (C) + bool matchAshrShlToSextInreg(MachineInstr &MI, + std::tuple &MatchInfo); + bool applyAshShlToSextInreg(MachineInstr &MI, + std::tuple &MatchInfo); + /// Try to transform \p MI by using all of the above /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h index 043be086ff417d..4e216a284088bd 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -251,6 +251,12 @@ m_GLShr(const LHS &L, const RHS &R) { return BinaryOp_match(L, R); } +template +inline BinaryOp_match +m_GAShr(const LHS &L, const RHS &R) { + return BinaryOp_match(L, R); +} + // Helper for unary instructions (G_[ZSA]EXT/G_TRUNC) etc template struct UnaryOp_match { SrcTy L; diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 9cb45e2bfc117e..4647afad418505 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -284,6 +284,15 @@ def hoist_logic_op_with_same_opcode_hands: GICombineRule < (apply [{ return Helper.applyBuildInstructionSteps(*${root}, ${info});}]) >; +// Fold ashr (shl x, C), C -> sext_inreg (C) +def shl_ashr_to_sext_inreg_matchinfo : GIDefMatchData<"std::tuple">; +def shl_ashr_to_sext_inreg : GICombineRule< + (defs root:$root, shl_ashr_to_sext_inreg_matchinfo:$info), + (match (wip_match_opcode G_ASHR): $root, + [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]), + (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}]) +>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -301,4 +310,5 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl]>; def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, simplify_add_to_sub, - hoist_logic_op_with_same_opcode_hands]>; + hoist_logic_op_with_same_opcode_hands, + shl_ashr_to_sext_inreg]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index b922f6988a2c23..48294a07597f8a 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1887,6 +1887,36 @@ bool CombinerHelper::applyBuildInstructionSteps( return true; } +bool CombinerHelper::matchAshrShlToSextInreg( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR); + int64_t ShlCst, AshrCst; + Register Src; + // FIXME: detect splat constant vectors. + if (!mi_match(MI.getOperand(0).getReg(), MRI, + m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst)))) + return false; + if (ShlCst != AshrCst) + return false; + if (!isLegalOrBeforeLegalizer( + {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}})) + return false; + MatchInfo = {Src, ShlCst}; + return true; +} +bool CombinerHelper::applyAshShlToSextInreg( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_ASHR); + Register Src; + int64_t ShiftAmt; + std::tie(Src, ShiftAmt) = MatchInfo; + unsigned Size = MRI.getType(Src).getScalarSizeInBits(); + Builder.setInstrAndDebugLoc(MI); + Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir new file mode 100644 index 00000000000000..14bda863d2c289 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-ashr-shl-to-sext-inreg.mir @@ -0,0 +1,90 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: ashr_shl_to_sext_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: ashr_shl_to_sext_inreg + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[TRUNC]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %1(s32) + %2:_(s16) = G_CONSTANT i16 8 + %3:_(s16) = G_SHL %0, %2(s16) + %4:_(s16) = exact G_ASHR %3, %2(s16) + %5:_(s32) = G_ANYEXT %4(s16) + $w0 = COPY %5(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: different_shift_amts +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: different_shift_amts + ; CHECK: liveins: $w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 + ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; CHECK: [[ASHR:%[0-9]+]]:_(s16) = exact G_ASHR [[SHL]], [[C1]](s16) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %1:_(s32) = COPY $w0 + %0:_(s16) = G_TRUNC %1(s32) + %2:_(s16) = G_CONSTANT i16 12 + %4:_(s16) = G_CONSTANT i16 8 + %3:_(s16) = G_SHL %0, %2(s16) + %5:_(s16) = exact G_ASHR %3, %4(s16) + %6:_(s32) = G_ANYEXT %5(s16) + $w0 = COPY %6(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ashr_shl_to_sext_inreg_vector +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } +body: | + bb.1: + liveins: $d0 + ; Currently don't support this for vectors just yet, this will need updating + ; when we do. + ; CHECK-LABEL: name: ashr_shl_to_sext_inreg_vector + ; CHECK: liveins: $d0 + ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(<4 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR]](<4 x s16>) + ; CHECK: [[ASHR:%[0-9]+]]:_(<4 x s16>) = exact G_ASHR [[SHL]], [[BUILD_VECTOR]](<4 x s16>) + ; CHECK: $d0 = COPY [[ASHR]](<4 x s16>) + ; CHECK: RET_ReallyLR implicit $d0 + %0:_(<4 x s16>) = COPY $d0 + %2:_(s16) = G_CONSTANT i16 8 + %1:_(<4 x s16>) = G_BUILD_VECTOR %2(s16), %2(s16), %2(s16), %2(s16) + %3:_(<4 x s16>) = G_SHL %0, %1(<4 x s16>) + %4:_(<4 x s16>) = exact G_ASHR %3, %1(<4 x s16>) + $d0 = COPY %4(<4 x s16>) + RET_ReallyLR implicit $d0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll index e5d26476e94248..f3a53fb7d22d50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll @@ -674,8 +674,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 -; GFX6-NEXT: s_lshl_b32 s0, s0, 8 -; GFX6-NEXT: s_ashr_i32 s0, s0, 8 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x180000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm @@ -830,8 +829,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s2, s2, s0 ; GFX6-NEXT: s_bfe_i32 s0, s2, 0x80000 -; GFX6-NEXT: s_lshl_b32 s0, s0, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 +; GFX6-NEXT: s_sext_i32_i8 s0, s0 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm @@ -854,8 +852,7 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s2, s2, s0 ; GFX6-NEXT: s_bfe_i32 s0, s2, 8 -; GFX6-NEXT: s_lshl_b32 s0, s0, 24 -; GFX6-NEXT: s_ashr_i32 s0, s0, 24 +; GFX6-NEXT: s_sext_i32_i8 s0, s0 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm @@ -879,8 +876,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add ; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0 +; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm %load = load i8, i8 addrspace(1)* %ptr, align 1 @@ -904,8 +900,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a ; GFX6-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: v_bfe_i32 v0, v0, 8, 0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX6-NEXT: v_ashrrev_i32_e32 v0, 24, v0 +; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_endpgm %load = load i8, i8 addrspace(1)* %ptr, align 1 @@ -927,8 +922,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 31 -; GFX6-NEXT: s_ashr_i32 s0, s0, 31 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -951,8 +945,7 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 30 -; GFX6-NEXT: s_ashr_i32 s0, s0, 30 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10001 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -975,8 +968,7 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 30 -; GFX6-NEXT: s_ashr_i32 s0, s0, 30 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 ; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20001 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll index ab3fbc03e81d57..a8098b7dd9d159 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll @@ -423,8 +423,7 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace( ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshl_b32 s0, s0, 31 -; GFX6-NEXT: s_ashr_i32 s0, s0, 31 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x10000 ; GFX6-NEXT: v_mov_b32_e32 v0, s0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 @@ -950,22 +949,22 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg: ; GFX6: ; %bb.0: -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd -; GFX6-NEXT: s_mov_b32 s6, -1 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 -; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_and_b32 s0, s0, 63 -; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 -; GFX6-NEXT: v_mov_b32_e32 v1, s1 -; GFX6-NEXT: v_mov_b32_e32 v0, s0 -; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 -; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 -; GFX6-NEXT: s_endpgm +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xb +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_mov_b64 s[10:11], s[6:7] +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_load_dword s0, s[0:1], 0x0 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_and_b32 s0, s0, 63 +; GFX6-NEXT: s_bfe_u32 s1, s0, 0x20002 +; GFX6-NEXT: v_mov_b32_e32 v1, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s0 +; GFX6-NEXT: buffer_store_dword v1, off, s[4:7], 0 +; GFX6-NEXT: buffer_store_dword v0, off, s[8:11], 0 +; GFX6-NEXT: s_endpgm i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 { %src = load i32, i32 addrspace(1)* %in, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index f6565fe1b6e24a..db9e75dd582c82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -3415,8 +3415,7 @@ define i64 @v_sdiv_i64_24bit(i64 %num, i64 %den) { ; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| ; CGP-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and i64 %num, 16777215 @@ -3736,10 +3735,8 @@ define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v3| ; CGP-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 -; CGP-NEXT: v_lshlrev_b32_e32 v2, 7, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v2 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 +; CGP-NEXT: v_bfe_i32 v2, v2, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; CGP-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 06d46321a59b61..7f55c735859753 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -3363,8 +3363,7 @@ define i64 @v_srem_i64_24bit(i64 %num, i64 %den) { ; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and i64 %num, 16777215 @@ -3677,20 +3676,18 @@ define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_rcp_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 30, v6 ; CGP-NEXT: v_or_b32_e32 v6, 1, v6 -; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 +; CGP-NEXT: v_bfe_i32 v0, v0, 0, 25 ; CGP-NEXT: v_mul_f32_e32 v5, v1, v5 ; CGP-NEXT: v_trunc_f32_e32 v5, v5 ; CGP-NEXT: v_mad_f32 v1, -v5, v4, v1 ; CGP-NEXT: v_cvt_i32_f32_e32 v5, v5 ; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v4| ; CGP-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1 ; CGP-NEXT: v_mul_lo_u32 v3, v1, v3 ; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 -; CGP-NEXT: v_lshlrev_b32_e32 v2, 7, v2 -; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v2 +; CGP-NEXT: v_bfe_i32 v2, v2, 0, 25 ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and <2 x i64> %num, From 04a6ea5d77e7613a5e1398ddf2a0fcb4e1cea41c Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Fri, 14 Aug 2020 02:00:07 -0700 Subject: [PATCH 068/101] [GlobalISel] Add a combine for sext_inreg(load x), c --> sextload x This is restricted to single use loads, which if we fold to sextloads we can find more optimal addressing modes on AArch64. This also fixes an overload the MachineFunction::getMachineMemOperand() method which was incorrectly using the MF alignment instead of the MMO alignment. Differential Revision: https://reviews.llvm.org/D85966 --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 + .../include/llvm/Target/GlobalISel/Combine.td | 9 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 62 +++++++++++ llvm/lib/CodeGen/MachineFunction.cpp | 2 +- ...alizercombiner-sextload-from-sextinreg.mir | 103 ++++++++++++++++++ 5 files changed, 178 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index e5f2700f6de9a2..61af8cd15f11de 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -135,6 +135,10 @@ class CombinerHelper { bool matchSextTruncSextLoad(MachineInstr &MI); bool applySextTruncSextLoad(MachineInstr &MI); + /// Match sext_inreg(load p), imm -> sextload p + bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); + bool applySextInRegOfLoad(MachineInstr &MI, std::tuple &MatchInfo); + bool matchElideBrByInvertingCond(MachineInstr &MI); void applyElideBrByInvertingCond(MachineInstr &MI); bool tryElideBrByInvertingCond(MachineInstr &MI); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 4647afad418505..2e85c6064a127c 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -132,6 +132,13 @@ def sext_trunc_sextload : GICombineRule< [{ return Helper.matchSextTruncSextLoad(*${d}); }]), (apply [{ Helper.applySextTruncSextLoad(*${d}); }])>; +def sext_inreg_of_load_matchdata : GIDefMatchData<"std::tuple">; +def sext_inreg_of_load : GICombineRule< + (defs root:$root, sext_inreg_of_load_matchdata:$matchinfo), + (match (wip_match_opcode G_SEXT_INREG):$root, + [{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]), + (apply [{ return Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>; + def combine_indexed_load_store : GICombineRule< (defs root:$root, indexed_load_store_matchdata:$matchinfo), (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root, @@ -311,4 +318,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store, undef_combines, identity_combines, simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, - shl_ashr_to_sext_inreg]>; + shl_ashr_to_sext_inreg, sext_inreg_of_load]>; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 48294a07597f8a..588a24e1dc57f3 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -614,6 +615,67 @@ bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) { return true; } +bool CombinerHelper::matchSextInRegOfLoad( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + + // Only supports scalars for now. + if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + return false; + + Register SrcReg = MI.getOperand(1).getReg(); + MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); + if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg())) + return false; + + // If the sign extend extends from a narrower width than the load's width, + // then we can narrow the load width when we combine to a G_SEXTLOAD. + auto &MMO = **LoadDef->memoperands_begin(); + // Don't do this for non-simple loads. + if (MMO.isAtomic() || MMO.isVolatile()) + return false; + + // Avoid widening the load at all. + unsigned NewSizeBits = + std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits()); + + // Don't generate G_SEXTLOADs with a < 1 byte width. + if (NewSizeBits < 8) + return false; + // Don't bother creating a non-power-2 sextload, it will likely be broken up + // anyway for most targets. + if (!isPowerOf2_32(NewSizeBits)) + return false; + MatchInfo = {LoadDef->getOperand(0).getReg(), NewSizeBits}; + return true; +} + +bool CombinerHelper::applySextInRegOfLoad( + MachineInstr &MI, std::tuple &MatchInfo) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); + Register LoadReg; + unsigned ScalarSizeBits; + std::tie(LoadReg, ScalarSizeBits) = MatchInfo; + auto *LoadDef = MRI.getVRegDef(LoadReg); + assert(LoadDef && "Expected a load reg"); + + // If we have the following: + // %ld = G_LOAD %ptr, (load 2) + // %ext = G_SEXT_INREG %ld, 8 + // ==> + // %ld = G_SEXTLOAD %ptr (load 1) + + auto &MMO = **LoadDef->memoperands_begin(); + Builder.setInstrAndDebugLoc(MI); + auto &MF = Builder.getMF(); + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8); + Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), + LoadDef->getOperand(1).getReg(), *NewMMO); + MI.eraseFromParent(); + return true; +} + bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, Register &Offset) { auto &MF = *MI.getParent()->getParent(); diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 464f71a4fd5397..abf47847814f20 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -477,7 +477,7 @@ MachineMemOperand *MachineFunction::getMachineMemOperand( MachineMemOperand *MachineFunction::getMachineMemOperand( const MachineMemOperand *MMO, MachinePointerInfo &PtrInfo, uint64_t Size) { return new (Allocator) MachineMemOperand( - PtrInfo, MMO->getFlags(), Size, Alignment, AAMDNodes(), nullptr, + PtrInfo, MMO->getFlags(), Size, MMO->getBaseAlign(), AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir new file mode 100644 index 00000000000000..a216c5b74b3561 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-sextload-from-sextinreg.mir @@ -0,0 +1,103 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: sextload_from_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: sextload_from_inreg + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s16) = G_SEXTLOAD [[COPY]](p0) :: (load 1, align 2) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXTLOAD]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (load 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: non_pow_2_inreg +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: non_pow_2_inreg + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 4) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 24 + ; CHECK: $w0 = COPY [[SEXT_INREG]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s32) = G_LOAD %0(p0) :: (load 4) + %2:_(s32) = G_SEXT_INREG %1, 24 + $w0 = COPY %2(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: atomic +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: atomic + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (load acquire 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: volatile +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } +body: | + bb.1: + liveins: $x0 + + ; CHECK-LABEL: name: volatile + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (volatile load 2) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s16) = G_SEXT_INREG [[LOAD]], 8 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SEXT_INREG]](s16) + ; CHECK: $w0 = COPY [[ANYEXT]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(p0) = COPY $x0 + %1:_(s16) = G_LOAD %0(p0) :: (volatile load 2) + %2:_(s16) = G_SEXT_INREG %1, 8 + %3:_(s32) = G_ANYEXT %2(s16) + $w0 = COPY %3(s32) + RET_ReallyLR implicit $w0 + +... From a3538b83943f640865b92e947da0d5ef5bdc930b Mon Sep 17 00:00:00 2001 From: Tim Keith Date: Tue, 18 Aug 2020 10:47:52 -0700 Subject: [PATCH 069/101] [flang] Improve error messages for procedures in expressions When a procedure name was used on the RHS of an assignment we were not reporting the error. When one was used in an expression the error message wasn't very good (e.g. "Operands of + must be numeric; have INTEGER(4) and untyped"). Detect these cases in ArgumentAnalyzer and emit better messages, depending on whether the named procedure is a function or subroutine. Procedure names may appear as actual arguments to function and subroutine calls so don't report errors in those cases. That is the same case where assumed type arguments are allowed, so rename `isAssumedType_` to `isProcedureCall_` and use that to decide if it is an error. Differential Revision: https://reviews.llvm.org/D86107 --- flang/include/flang/Evaluate/tools.h | 1 + flang/lib/Evaluate/tools.cpp | 4 ++++ flang/lib/Semantics/expression.cpp | 25 +++++++++++++++++-------- flang/test/Semantics/assign04.f90 | 9 +++++++++ flang/test/Semantics/resolve63.f90 | 5 +++++ 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 081795208b136a..98d4a516054eea 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -813,6 +813,7 @@ template bool IsAllocatableOrPointer(const A &x) { // Procedure and pointer detection predicates bool IsProcedure(const Expr &); +bool IsFunction(const Expr &); bool IsProcedurePointer(const Expr &); bool IsNullPointer(const Expr &); diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 6cc411f22adb05..e9089f56aa46a5 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -703,6 +703,10 @@ bool IsAssumedRank(const ActualArgument &arg) { bool IsProcedure(const Expr &expr) { return std::holds_alternative(expr.u); } +bool IsFunction(const Expr &expr) { + const auto *designator{std::get_if(&expr.u)}; + return designator && designator->GetType().has_value(); +} bool IsProcedurePointer(const Expr &expr) { return std::visit(common::visitors{ diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 9b6531cdbd6dfb..cfb908179c3a93 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -98,11 +98,10 @@ static std::optional AnalyzeTypeSpec( class ArgumentAnalyzer { public: explicit ArgumentAnalyzer(ExpressionAnalyzer &context) - : context_{context}, allowAssumedType_{false} {} + : context_{context}, isProcedureCall_{false} {} ArgumentAnalyzer(ExpressionAnalyzer &context, parser::CharBlock source, - bool allowAssumedType = false) - : context_{context}, source_{source}, allowAssumedType_{ - allowAssumedType} {} + bool isProcedureCall = false) + : context_{context}, source_{source}, isProcedureCall_{isProcedureCall} {} bool fatalErrors() const { return fatalErrors_; } ActualArguments &&GetActuals() { CHECK(!fatalErrors_); @@ -167,7 +166,7 @@ class ArgumentAnalyzer { ActualArguments actuals_; parser::CharBlock source_; bool fatalErrors_{false}; - const bool allowAssumedType_; + const bool isProcedureCall_; // false for user-defined op or assignment const Symbol *sawDefinedOp_{nullptr}; }; @@ -2003,7 +2002,7 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::FunctionReference &funcRef, std::optional *structureConstructor) { const parser::Call &call{funcRef.v}; auto restorer{GetContextualMessages().SetLocation(call.source)}; - ArgumentAnalyzer analyzer{*this, call.source, true /* allowAssumedType */}; + ArgumentAnalyzer analyzer{*this, call.source, true /* isProcedureCall */}; for (const auto &arg : std::get>(call.t)) { analyzer.Analyze(arg, false /* not subroutine call */); } @@ -2042,7 +2041,7 @@ MaybeExpr ExpressionAnalyzer::Analyze(const parser::FunctionReference &funcRef, void ExpressionAnalyzer::Analyze(const parser::CallStmt &callStmt) { const parser::Call &call{callStmt.v}; auto restorer{GetContextualMessages().SetLocation(call.source)}; - ArgumentAnalyzer analyzer{*this, call.source, true /* allowAssumedType */}; + ArgumentAnalyzer analyzer{*this, call.source, true /* isProcedureCall */}; const auto &actualArgList{std::get>(call.t)}; for (const auto &arg : actualArgList) { analyzer.Analyze(arg, true /* is subroutine call */); @@ -2982,7 +2981,7 @@ std::optional ArgumentAnalyzer::AnalyzeExpr( source_.ExtendToCover(expr.source); if (const Symbol * assumedTypeDummy{AssumedTypeDummy(expr)}) { expr.typedExpr.Reset(new GenericExprWrapper{}, GenericExprWrapper::Deleter); - if (allowAssumedType_) { + if (isProcedureCall_) { return ActualArgument{ActualArgument::AssumedType{*assumedTypeDummy}}; } else { context_.SayAt(expr.source, @@ -2990,6 +2989,16 @@ std::optional ArgumentAnalyzer::AnalyzeExpr( return std::nullopt; } } else if (MaybeExpr argExpr{context_.Analyze(expr)}) { + if (!isProcedureCall_ && IsProcedure(*argExpr)) { + if (IsFunction(*argExpr)) { + context_.SayAt( + expr.source, "Function call must have argument list"_err_en_US); + } else { + context_.SayAt( + expr.source, "Subroutine name is not allowed here"_err_en_US); + } + return std::nullopt; + } return ActualArgument{context_.Fold(std::move(*argExpr))}; } else { return std::nullopt; diff --git a/flang/test/Semantics/assign04.f90 b/flang/test/Semantics/assign04.f90 index 99f4901b205016..fb47f6dceab966 100644 --- a/flang/test/Semantics/assign04.f90 +++ b/flang/test/Semantics/assign04.f90 @@ -132,3 +132,12 @@ subroutine s10(a, n) real a(n) a(1:n) = 0.0 ! should not get a second error here end + +subroutine s11 + intrinsic :: sin + real :: a + !ERROR: Function call must have argument list + a = sin + !ERROR: Subroutine name is not allowed here + a = s11 +end diff --git a/flang/test/Semantics/resolve63.f90 b/flang/test/Semantics/resolve63.f90 index bd4e1d14e195a2..141945a262276d 100644 --- a/flang/test/Semantics/resolve63.f90 +++ b/flang/test/Semantics/resolve63.f90 @@ -104,6 +104,7 @@ subroutine test_conformability(x, y) ! Invalid operand types when user-defined operator is not available module m2 + intrinsic :: sin type :: t end type type(t) :: x, y @@ -113,6 +114,10 @@ module m2 subroutine test_relational() !ERROR: Operands of .EQ. must have comparable types; have TYPE(t) and REAL(4) l = x == r + !ERROR: Subroutine name is not allowed here + l = r == test_numeric + !ERROR: Function call must have argument list + l = r == sin end subroutine test_numeric() !ERROR: Operands of + must be numeric; have REAL(4) and TYPE(t) From f7a49d2aa691266497c4baa35f29ba0167b39d23 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Fri, 14 Aug 2020 07:56:29 -0700 Subject: [PATCH 070/101] [WIP][DebugInfo] Lazily parse debug_loclist offsets Parsing DWARFv5 debug_loclist offsets when a CU is parsed is weighing down memory usage of symbolizers that don't need to parse this data at all. There's not much benefit to caching these anyway - since they are O(1) lookup and reading once you know where the offset list starts (and can do bounds checking with the offset list size too). In general, I think it might be time to start paying down some of the technical debt of loc/loclist/range/rnglist parsing to try to unify it a bit more. eg: * Currently DWARFUnit has: RangeSection, RangeSectionBase, LocSection, LocSectionBase, LocTable, RngListTable, LoclistTableHeader (be nice if these were all wrapped up in two variables - one for loclists, one for rnglists) * rnglists and loclists are handled differently (see: LoclistTableHeader, but no RnglistTableHeader) * maybe all these types could be less stateful - lazily parse what they need to, even reparsing rather than caching because it doesn't seem too expensive, for instance. (though admittedly so long as it's constantcost/overead per compilatiton that's probably adequate) * Maybe implementing and using a DWARFDataExtractor that can be sub-ranged (so we could slice it up to just the single contribution) - though maybe that's not so useful because loc/ranges need to refer to it by absolute, not contribution-relative mechanisms Differential Revision: https://reviews.llvm.org/D86110 --- .../Plugins/SymbolFile/DWARF/DWARFUnit.h | 7 +++-- .../SymbolFile/DWARF/DW_AT_loclists_base.s | 4 +-- .../llvm/DebugInfo/DWARF/DWARFDebugLoc.h | 2 ++ .../llvm/DebugInfo/DWARF/DWARFListTable.h | 31 ++++++++++--------- llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h | 11 ++----- llvm/lib/DebugInfo/DWARF/DWARFContext.cpp | 4 +-- llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp | 9 +++--- llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 16 +++++++--- 8 files changed, 46 insertions(+), 38 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index affad286a49038..1d8236c4ed42fd 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -237,7 +237,9 @@ class DWARFUnit : public lldb_private::UserID { llvm::Optional GetRnglistOffset(uint32_t Index) const { if (!m_rnglist_table) return llvm::None; - if (llvm::Optional off = m_rnglist_table->getOffsetEntry(Index)) + if (llvm::Optional off = m_rnglist_table->getOffsetEntry( + m_dwarf.GetDWARFContext().getOrLoadRngListsData().GetAsLLVM(), + Index)) return *off + m_ranges_base; return llvm::None; } @@ -246,7 +248,8 @@ class DWARFUnit : public lldb_private::UserID { if (!m_loclist_table_header) return llvm::None; - llvm::Optional Offset = m_loclist_table_header->getOffsetEntry(Index); + llvm::Optional Offset = m_loclist_table_header->getOffsetEntry( + m_dwarf.GetDWARFContext().getOrLoadLocListsData().GetAsLLVM(), Index); if (!Offset) return llvm::None; return *Offset + m_loclists_base; diff --git a/lldb/test/Shell/SymbolFile/DWARF/DW_AT_loclists_base.s b/lldb/test/Shell/SymbolFile/DWARF/DW_AT_loclists_base.s index ca32e9930a76a7..6ff35f8c659648 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/DW_AT_loclists_base.s +++ b/lldb/test/Shell/SymbolFile/DWARF/DW_AT_loclists_base.s @@ -5,7 +5,7 @@ # CHECK-LABEL: image lookup -v -s lookup_loclists # CHECK: Variable: {{.*}}, name = "x0", type = "int", location = DW_OP_reg0 RAX, -# CHECK: Variable: {{.*}}, name = "x1", type = "int", location = , +# CHECK-NOT: Variable: loclists: nop @@ -28,7 +28,7 @@ lookup_loclists: .short 5 # Version .byte 8 # Address size .byte 0 # Segment selector size - .long 1 # Offset entry count + .long 2 # Offset entry count .Lloclists_table_base: .long .Ldebug_loc0-.Lloclists_table_base .long .Ldebug_loc1-.Lloclists_table_base diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h index 3b141304f85f49..dbc11c51a7890b 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h @@ -72,6 +72,8 @@ class DWARFLocationTable { std::function(uint32_t)> LookupAddr, function_ref)> Callback) const; + const DWARFDataExtractor &getData() { return Data; } + protected: DWARFDataExtractor Data; diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h index 496fdb2477f9dc..bcfc71381aeee8 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFListTable.h @@ -72,10 +72,6 @@ class DWARFListTableHeader { }; Header HeaderData; - /// The offset table, which contains offsets to the individual list entries. - /// It is used by forms such as DW_FORM_rnglistx. - /// FIXME: Generate the table and use the appropriate forms. - std::vector Offsets; /// The table's format, either DWARF32 or DWARF64. dwarf::DwarfFormat Format; /// The offset at which the header (and hence the table) is located within @@ -93,7 +89,6 @@ class DWARFListTableHeader { void clear() { HeaderData = {}; - Offsets.clear(); } uint64_t getHeaderOffset() const { return HeaderOffset; } uint8_t getAddrSize() const { return HeaderData.AddrSize; } @@ -115,11 +110,17 @@ class DWARFListTableHeader { llvm_unreachable("Invalid DWARF format (expected DWARF32 or DWARF64"); } - void dump(raw_ostream &OS, DIDumpOptions DumpOpts = {}) const; - Optional getOffsetEntry(uint32_t Index) const { - if (Index < Offsets.size()) - return Offsets[Index]; - return None; + void dump(DataExtractor Data, raw_ostream &OS, + DIDumpOptions DumpOpts = {}) const; + Optional getOffsetEntry(DataExtractor Data, uint32_t Index) const { + if (Index > HeaderData.OffsetEntryCount) + return None; + + uint8_t OffsetByteSize = Format == dwarf::DWARF64 ? 8 : 4; + uint64_t Offset = + getHeaderOffset() + getHeaderSize(Format) + OffsetByteSize * Index; + auto R = Data.getUnsigned(&Offset, OffsetByteSize); + return R; } /// Extract the table header and the array of offsets. @@ -169,14 +170,14 @@ template class DWARFListTableBase { uint8_t getAddrSize() const { return Header.getAddrSize(); } dwarf::DwarfFormat getFormat() const { return Header.getFormat(); } - void dump(raw_ostream &OS, + void dump(DWARFDataExtractor Data, raw_ostream &OS, llvm::function_ref(uint32_t)> LookupPooledAddress, DIDumpOptions DumpOpts = {}) const; /// Return the contents of the offset entry designated by a given index. - Optional getOffsetEntry(uint32_t Index) const { - return Header.getOffsetEntry(Index); + Optional getOffsetEntry(DataExtractor Data, uint32_t Index) const { + return Header.getOffsetEntry(Data, Index); } /// Return the size of the table header including the length but not including /// the offsets. This is dependent on the table format, which is unambiguously @@ -240,11 +241,11 @@ Error DWARFListType::extract(DWARFDataExtractor Data, template void DWARFListTableBase::dump( - raw_ostream &OS, + DWARFDataExtractor Data, raw_ostream &OS, llvm::function_ref(uint32_t)> LookupPooledAddress, DIDumpOptions DumpOpts) const { - Header.dump(OS, DumpOpts); + Header.dump(Data, OS, DumpOpts); OS << HeaderString << "\n"; // Determine the length of the longest encoding string we have in the table, diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 1613e74d4a367e..c76ee5efa37b26 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -412,18 +412,13 @@ class DWARFUnit { /// Return a rangelist's offset based on an index. The index designates /// an entry in the rangelist table's offset array and is supplied by /// DW_FORM_rnglistx. - Optional getRnglistOffset(uint32_t Index) { - if (!RngListTable) - return None; - if (Optional Off = RngListTable->getOffsetEntry(Index)) - return *Off + RangeSectionBase; - return None; - } + Optional getRnglistOffset(uint32_t Index); Optional getLoclistOffset(uint32_t Index) { if (!LoclistTableHeader) return None; - if (Optional Off = LoclistTableHeader->getOffsetEntry(Index)) + if (Optional Off = + LoclistTableHeader->getOffsetEntry(LocTable->getData(), Index)) return *Off + getLocSectionBase(); return None; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 3bcde8fafb1f75..96ba5794683344 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -255,7 +255,7 @@ static void dumpRnglistsSection( break; Offset = TableOffset + Length; } else { - Rnglists.dump(OS, LookupPooledAddress, DumpOpts); + Rnglists.dump(rnglistData, OS, LookupPooledAddress, DumpOpts); } } } @@ -316,7 +316,7 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, return; } - Header.dump(OS, DumpOpts); + Header.dump(Data, OS, DumpOpts); uint64_t EndOffset = Header.length() + Header.getHeaderOffset(); Data.setAddressSize(Header.getAddrSize()); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp index 2124a49bef6065..c876af1e9b5135 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp @@ -71,12 +71,12 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data, ") than there is space for", SectionName.data(), HeaderOffset, HeaderData.OffsetEntryCount); Data.setAddressSize(HeaderData.AddrSize); - for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I) - Offsets.push_back(Data.getRelocatedValue(OffsetByteSize, OffsetPtr)); + *OffsetPtr += HeaderData.OffsetEntryCount * OffsetByteSize; return Error::success(); } -void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { +void DWARFListTableHeader::dump(DataExtractor Data, raw_ostream &OS, + DIDumpOptions DumpOpts) const { if (DumpOpts.Verbose) OS << format("0x%8.8" PRIx64 ": ", HeaderOffset); int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(Format); @@ -91,7 +91,8 @@ void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { if (HeaderData.OffsetEntryCount > 0) { OS << "offsets: ["; - for (const auto &Off : Offsets) { + for (uint32_t I = 0; I < HeaderData.OffsetEntryCount; ++I) { + auto Off = *getOffsetEntry(Data, I); OS << format("\n0x%0*" PRIx64, OffsetDumpWidth, Off); if (DumpOpts.Verbose) OS << format(" => 0x%08" PRIx64, diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 0527f29d1a1a9d..b871e6ebdca56c 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -548,17 +548,13 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Header.getFormat()); uint64_t Offset = getLocSectionBase(); - DWARFDataExtractor Data(Context.getDWARFObj(), *LocSection, - isLittleEndian, getAddressByteSize()); + const DWARFDataExtractor &Data = LocTable->getData(); if (Offset < HeaderSize) return createStringError(errc::invalid_argument, "did not detect a valid" " list table with base = 0x%" PRIx64 "\n", Offset); Offset -= HeaderSize; - if (auto *IndexEntry = Header.getIndexEntry()) - if (const auto *Contrib = IndexEntry->getContribution(DW_SECT_LOCLISTS)) - Offset += Contrib->Offset; if (Error E = LoclistTableHeader->extract(Data, &Offset)) return createStringError(errc::invalid_argument, "parsing a loclist table: " + @@ -1009,3 +1005,13 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) { return DescOrError.takeError(); return *DescOrError; } + +Optional DWARFUnit::getRnglistOffset(uint32_t Index) { + if (!RngListTable) + return None; + DataExtractor RangesData(RangeSection->Data, isLittleEndian, + getAddressByteSize()); + if (Optional Off = RngListTable->getOffsetEntry(RangesData, Index)) + return *Off + RangeSectionBase; + return None; +} From 5a15f6628efcb583e1cca1fdc57d7e64f5f665da Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 27 Jul 2020 22:00:50 -0400 Subject: [PATCH 071/101] GlobalISel: Implement fewerElementsVector for G_INSERT_VECTOR_ELT Add unit tests since AMDGPU will only trigger this for gigantic vectors, and won't use the annoying odd sized breakdown case. --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 6 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 45 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 +- .../AMDGPU/GlobalISel/insertelement.large.ll | 137 ++ .../GlobalISel/legalize-insert-vector-elt.mir | 1227 ++--------------- .../GlobalISel/LegalizerHelperTest.cpp | 81 ++ 6 files changed, 392 insertions(+), 1106 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index e1295f66c29847..d0e7419ec8129d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -279,9 +279,9 @@ class LegalizerHelper { LegalizeResult fewerElementsVectorBuildVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - LegalizeResult fewerElementsVectorExtractVectorElt(MachineInstr &MI, - unsigned TypeIdx, - LLT NarrowTy); + LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy); LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 841c37ec7a3ea0..2f722d04a69732 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3608,18 +3608,24 @@ LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, } LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI, - unsigned TypeIdx, - LLT NarrowVecTy) { - assert(TypeIdx == 1 && "not a vector type index"); +LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowVecTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcVec = MI.getOperand(1).getReg(); + Register InsertVal; + bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; + + assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index"); + if (IsInsert) + InsertVal = MI.getOperand(2).getReg(); + + Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg(); // TODO: Handle total scalarization case. if (!NarrowVecTy.isVector()) return UnableToLegalize; - Register DstReg = MI.getOperand(0).getReg(); - Register SrcVec = MI.getOperand(1).getReg(); - Register Idx = MI.getOperand(2).getReg(); LLT VecTy = MRI.getType(SrcVec); // If the index is a constant, we can really break this down as you would @@ -3637,8 +3643,8 @@ LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI, LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec); // Build a sequence of NarrowTy pieces in VecParts for this operand. - buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, - TargetOpcode::G_ANYEXT); + LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts, + TargetOpcode::G_ANYEXT); unsigned NewNumElts = NarrowVecTy.getNumElements(); @@ -3647,12 +3653,26 @@ LegalizerHelper::fewerElementsVectorExtractVectorElt(MachineInstr &MI, auto NewIdx = MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx); - MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + if (IsInsert) { + LLT PartTy = MRI.getType(VecParts[PartIdx]); + + // Use the adjusted index to insert into one of the subvectors. + auto InsertPart = MIRBuilder.buildInsertVectorElement( + PartTy, VecParts[PartIdx], InsertVal, NewIdx); + VecParts[PartIdx] = InsertPart.getReg(0); + + // Recombine the inserted subvector with the others to reform the result + // vector. + buildWidenedRemergeToDst(DstReg, LCMTy, VecParts); + } else { + MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx); + } + MI.eraseFromParent(); return Legalized; } - // With a variable index, we can't perform the extract in a smaller type, so + // With a variable index, we can't perform the operation in a smaller type, so // we're forced to expand this. // // TODO: We could emit a chain of compare/select to figure out which piece to @@ -3992,7 +4012,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_BUILD_VECTOR: return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_EXTRACT_VECTOR_ELT: - return fewerElementsVectorExtractVectorElt(MI, TypeIdx, NarrowTy); + case G_INSERT_VECTOR_ELT: + return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 28fbc3ec59e710..4c3adb108031a5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1359,7 +1359,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .clampScalar(EltTypeIdx, S32, S64) .clampScalar(VecTypeIdx, S32, S64) .clampScalar(IdxTypeIdx, S32, S32) - .clampMaxNumElements(1, S32, 32) + .clampMaxNumElements(VecTypeIdx, S32, 32) // TODO: Clamp elements for 64-bit vectors? // It should only be necessary with variable indexes. // As a last resort, lower to the stack diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll new file mode 100644 index 00000000000000..5d1468eba04ea7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, <64 x i32> addrspace(1)* %ptr.out) #0 { +; GCN-LABEL: v_insert_v64i32_37: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GCN-NEXT: v_lshlrev_b64 v[0:1], 8, v[0:1] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_add_co_u32_e32 v8, vcc, v2, v0 +; GCN-NEXT: s_mov_b32 s1, 0 +; GCN-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v1, vcc +; GCN-NEXT: s_movk_i32 s0, 0x80 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_add_co_u32_e32 v12, vcc, v8, v2 +; GCN-NEXT: s_movk_i32 s0, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v65, s1 +; GCN-NEXT: v_mov_b32_e32 v5, s3 +; GCN-NEXT: v_mov_b32_e32 v64, s0 +; GCN-NEXT: s_movk_i32 s0, 0x50 +; GCN-NEXT: v_mov_b32_e32 v69, s1 +; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, v9, v3, vcc +; GCN-NEXT: v_mov_b32_e32 v4, s2 +; GCN-NEXT: v_add_co_u32_e32 v66, vcc, v4, v0 +; GCN-NEXT: v_mov_b32_e32 v68, s0 +; GCN-NEXT: s_movk_i32 s0, 0x60 +; GCN-NEXT: v_mov_b32_e32 v71, s1 +; GCN-NEXT: v_addc_co_u32_e32 v67, vcc, v5, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v70, s0 +; GCN-NEXT: s_movk_i32 s0, 0x70 +; GCN-NEXT: v_mov_b32_e32 v73, s1 +; GCN-NEXT: v_add_co_u32_e32 v74, vcc, v66, v2 +; GCN-NEXT: v_mov_b32_e32 v72, s0 +; GCN-NEXT: s_movk_i32 s0, 0x90 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_addc_co_u32_e32 v75, vcc, v67, v3, vcc +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_add_co_u32_e32 v76, vcc, v66, v0 +; GCN-NEXT: v_addc_co_u32_e32 v77, vcc, v67, v1, vcc +; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[0:3], v[12:13], off +; GCN-NEXT: v_add_co_u32_e32 v10, vcc, 64, v8 +; GCN-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v9, vcc +; GCN-NEXT: v_add_co_u32_e32 v28, vcc, v8, v64 +; GCN-NEXT: v_addc_co_u32_e32 v29, vcc, v9, v65, vcc +; GCN-NEXT: global_load_dwordx4 v[32:35], v[8:9], off +; GCN-NEXT: global_load_dwordx4 v[36:39], v[8:9], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[40:43], v[8:9], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[44:47], v[8:9], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[48:51], v[10:11], off +; GCN-NEXT: global_load_dwordx4 v[52:55], v[10:11], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[56:59], v[10:11], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[60:63], v[10:11], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 +; GCN-NEXT: global_load_dwordx4 v[16:19], v[28:29], off +; GCN-NEXT: global_load_dwordx4 v[20:23], v[28:29], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[24:27], v[28:29], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[28:31], v[28:29], off offset:48 +; GCN-NEXT: s_movk_i32 s0, 0xa0 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7 +; GCN-NEXT: s_waitcnt vmcnt(14) +; GCN-NEXT: global_store_dwordx4 v[74:75], v[0:3], off +; GCN-NEXT: global_store_dwordx4 v[76:77], v[4:7], off +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v0 +; GCN-NEXT: s_movk_i32 s0, 0xb0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v1, vcc +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[0:1], v[8:11], off +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[2:3], v[12:15], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v64 +; GCN-NEXT: s_movk_i32 s0, 0xd0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v65, vcc +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2 +; GCN-NEXT: s_movk_i32 s0, 0xe0 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[0:1], v[16:19], off +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[2:3], v[20:23], off +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v0 +; GCN-NEXT: s_movk_i32 s0, 0xf0 +; GCN-NEXT: v_mov_b32_e32 v3, s1 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v1, vcc +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v2 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v3, vcc +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[0:1], v[24:27], off +; GCN-NEXT: s_waitcnt vmcnt(7) +; GCN-NEXT: global_store_dwordx4 v[2:3], v[28:31], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, 64, v66 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v67, vcc +; GCN-NEXT: global_store_dwordx4 v[0:1], v[36:39], off offset:-48 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[40:43], off offset:-32 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[44:47], off offset:-16 +; GCN-NEXT: global_store_dwordx4 v[0:1], v[48:51], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v68 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v69, vcc +; GCN-NEXT: global_store_dwordx4 v[66:67], v[32:35], off +; GCN-NEXT: global_store_dwordx4 v[0:1], v[52:55], off +; GCN-NEXT: v_add_co_u32_e32 v0, vcc, v66, v70 +; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v67, v71, vcc +; GCN-NEXT: v_add_co_u32_e32 v2, vcc, v66, v72 +; GCN-NEXT: v_addc_co_u32_e32 v3, vcc, v67, v73, vcc +; GCN-NEXT: global_store_dwordx4 v[0:1], v[56:59], off +; GCN-NEXT: global_store_dwordx4 v[2:3], v[60:63], off +; GCN-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.workitem.id.x() + %gep.in = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.in, i32 %id + %vec = load <64 x i32>, <64 x i32> addrspace(1)* %gep.in + %insert = insertelement <64 x i32> %vec, i32 999, i32 37 + %gep.out = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.out, i32 %id + store <64 x i32> %insert, <64 x i32> addrspace(1)* %gep.out + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { "amdgpu-waves-per-eu"="1,10" } +attributes #1 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index 3f3ec6216585b2..6d0d24e2373eaa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -190,712 +190,94 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_64_65_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load 64, align 4, addrspace 4) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 64 + 64, align 4, addrspace 4) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 64 + 128, align 4, addrspace 4) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.1, align 256, addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK: G_STORE [[UV1]](s32), [[COPY1]](p5) :: (store 4 into %stack.1 + 4, align 256, addrspace 5) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK: G_STORE [[UV2]](s32), [[COPY2]](p5) :: (store 4 into %stack.1 + 8, align 256, addrspace 5) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK: G_STORE [[UV3]](s32), [[COPY3]](p5) :: (store 4 into %stack.1 + 12, align 256, addrspace 5) - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK: G_STORE [[UV4]](s32), [[COPY4]](p5) :: (store 4 into %stack.1 + 16, align 256, addrspace 5) - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK: G_STORE [[UV5]](s32), [[COPY5]](p5) :: (store 4 into %stack.1 + 20, align 256, addrspace 5) - ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK: G_STORE [[UV6]](s32), [[COPY6]](p5) :: (store 4 into %stack.1 + 24, align 256, addrspace 5) - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK: G_STORE [[UV7]](s32), [[COPY7]](p5) :: (store 4 into %stack.1 + 28, align 256, addrspace 5) - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK: G_STORE [[UV8]](s32), [[COPY8]](p5) :: (store 4 into %stack.1 + 32, align 256, addrspace 5) - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK: G_STORE [[UV9]](s32), [[COPY9]](p5) :: (store 4 into %stack.1 + 36, align 256, addrspace 5) - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK: G_STORE [[UV10]](s32), [[COPY10]](p5) :: (store 4 into %stack.1 + 40, align 256, addrspace 5) - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK: G_STORE [[UV11]](s32), [[COPY11]](p5) :: (store 4 into %stack.1 + 44, align 256, addrspace 5) - ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK: G_STORE [[UV12]](s32), [[COPY12]](p5) :: (store 4 into %stack.1 + 48, align 256, addrspace 5) - ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK: G_STORE [[UV13]](s32), [[COPY13]](p5) :: (store 4 into %stack.1 + 52, align 256, addrspace 5) - ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK: G_STORE [[UV14]](s32), [[COPY14]](p5) :: (store 4 into %stack.1 + 56, align 256, addrspace 5) - ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK: G_STORE [[UV15]](s32), [[COPY15]](p5) :: (store 4 into %stack.1 + 60, align 256, addrspace 5) - ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK: G_STORE [[UV16]](s32), [[COPY16]](p5) :: (store 4 into %stack.1 + 64, align 256, addrspace 5) - ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK: G_STORE [[UV17]](s32), [[COPY17]](p5) :: (store 4 into %stack.1 + 68, align 256, addrspace 5) - ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK: G_STORE [[UV18]](s32), [[COPY18]](p5) :: (store 4 into %stack.1 + 72, align 256, addrspace 5) - ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK: G_STORE [[UV19]](s32), [[COPY19]](p5) :: (store 4 into %stack.1 + 76, align 256, addrspace 5) - ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK: G_STORE [[UV20]](s32), [[COPY20]](p5) :: (store 4 into %stack.1 + 80, align 256, addrspace 5) - ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK: G_STORE [[UV21]](s32), [[COPY21]](p5) :: (store 4 into %stack.1 + 84, align 256, addrspace 5) - ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK: G_STORE [[UV22]](s32), [[COPY22]](p5) :: (store 4 into %stack.1 + 88, align 256, addrspace 5) - ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK: G_STORE [[UV23]](s32), [[COPY23]](p5) :: (store 4 into %stack.1 + 92, align 256, addrspace 5) - ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK: G_STORE [[UV24]](s32), [[COPY24]](p5) :: (store 4 into %stack.1 + 96, align 256, addrspace 5) - ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK: G_STORE [[UV25]](s32), [[COPY25]](p5) :: (store 4 into %stack.1 + 100, align 256, addrspace 5) - ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK: G_STORE [[UV26]](s32), [[COPY26]](p5) :: (store 4 into %stack.1 + 104, align 256, addrspace 5) - ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK: G_STORE [[UV27]](s32), [[COPY27]](p5) :: (store 4 into %stack.1 + 108, align 256, addrspace 5) - ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK: G_STORE [[UV28]](s32), [[COPY28]](p5) :: (store 4 into %stack.1 + 112, align 256, addrspace 5) - ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK: G_STORE [[UV29]](s32), [[COPY29]](p5) :: (store 4 into %stack.1 + 116, align 256, addrspace 5) - ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK: G_STORE [[UV30]](s32), [[COPY30]](p5) :: (store 4 into %stack.1 + 120, align 256, addrspace 5) - ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK: G_STORE [[UV31]](s32), [[COPY31]](p5) :: (store 4 into %stack.1 + 124, align 256, addrspace 5) - ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK: G_STORE [[UV32]](s32), [[COPY32]](p5) :: (store 4 into %stack.1 + 128, align 256, addrspace 5) - ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: G_STORE [[UV33]](s32), [[COPY33]](p5) :: (store 4 into %stack.1 + 132, align 256, addrspace 5) - ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK: G_STORE [[UV34]](s32), [[COPY34]](p5) :: (store 4 into %stack.1 + 136, align 256, addrspace 5) - ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK: G_STORE [[UV35]](s32), [[COPY35]](p5) :: (store 4 into %stack.1 + 140, align 256, addrspace 5) - ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK: G_STORE [[UV36]](s32), [[COPY36]](p5) :: (store 4 into %stack.1 + 144, align 256, addrspace 5) - ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK: G_STORE [[UV37]](s32), [[COPY37]](p5) :: (store 4 into %stack.1 + 148, align 256, addrspace 5) - ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK: G_STORE [[UV38]](s32), [[COPY38]](p5) :: (store 4 into %stack.1 + 152, align 256, addrspace 5) - ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK: G_STORE [[UV39]](s32), [[COPY39]](p5) :: (store 4 into %stack.1 + 156, align 256, addrspace 5) - ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK: G_STORE [[UV40]](s32), [[COPY40]](p5) :: (store 4 into %stack.1 + 160, align 256, addrspace 5) - ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK: G_STORE [[UV41]](s32), [[COPY41]](p5) :: (store 4 into %stack.1 + 164, align 256, addrspace 5) - ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK: G_STORE [[UV42]](s32), [[COPY42]](p5) :: (store 4 into %stack.1 + 168, align 256, addrspace 5) - ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK: G_STORE [[UV43]](s32), [[COPY43]](p5) :: (store 4 into %stack.1 + 172, align 256, addrspace 5) - ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK: G_STORE [[UV44]](s32), [[COPY44]](p5) :: (store 4 into %stack.1 + 176, align 256, addrspace 5) - ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK: G_STORE [[UV45]](s32), [[COPY45]](p5) :: (store 4 into %stack.1 + 180, align 256, addrspace 5) - ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK: G_STORE [[UV46]](s32), [[COPY46]](p5) :: (store 4 into %stack.1 + 184, align 256, addrspace 5) - ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK: G_STORE [[UV47]](s32), [[COPY47]](p5) :: (store 4 into %stack.1 + 188, align 256, addrspace 5) - ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK: G_STORE [[UV48]](s32), [[COPY48]](p5) :: (store 4 into %stack.1 + 192, align 256, addrspace 5) - ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK: G_STORE [[UV49]](s32), [[COPY49]](p5) :: (store 4 into %stack.1 + 196, align 256, addrspace 5) - ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK: G_STORE [[UV50]](s32), [[COPY50]](p5) :: (store 4 into %stack.1 + 200, align 256, addrspace 5) - ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK: G_STORE [[UV51]](s32), [[COPY51]](p5) :: (store 4 into %stack.1 + 204, align 256, addrspace 5) - ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK: G_STORE [[UV52]](s32), [[COPY52]](p5) :: (store 4 into %stack.1 + 208, align 256, addrspace 5) - ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK: G_STORE [[UV53]](s32), [[COPY53]](p5) :: (store 4 into %stack.1 + 212, align 256, addrspace 5) - ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK: G_STORE [[UV54]](s32), [[COPY54]](p5) :: (store 4 into %stack.1 + 216, align 256, addrspace 5) - ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK: G_STORE [[UV55]](s32), [[COPY55]](p5) :: (store 4 into %stack.1 + 220, align 256, addrspace 5) - ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK: G_STORE [[UV56]](s32), [[COPY56]](p5) :: (store 4 into %stack.1 + 224, align 256, addrspace 5) - ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK: G_STORE [[UV57]](s32), [[COPY57]](p5) :: (store 4 into %stack.1 + 228, align 256, addrspace 5) - ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK: G_STORE [[UV58]](s32), [[COPY58]](p5) :: (store 4 into %stack.1 + 232, align 256, addrspace 5) - ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK: G_STORE [[UV59]](s32), [[COPY59]](p5) :: (store 4 into %stack.1 + 236, align 256, addrspace 5) - ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK: G_STORE [[UV60]](s32), [[COPY60]](p5) :: (store 4 into %stack.1 + 240, align 256, addrspace 5) - ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK: G_STORE [[UV61]](s32), [[COPY61]](p5) :: (store 4 into %stack.1 + 244, align 256, addrspace 5) - ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK: G_STORE [[UV62]](s32), [[COPY62]](p5) :: (store 4 into %stack.1 + 248, align 256, addrspace 5) - ; CHECK: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) - ; CHECK: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK: G_STORE [[UV63]](s32), [[COPY63]](p5) :: (store 4 into %stack.1 + 252, align 256, addrspace 5) - ; CHECK: [[C67:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK: [[PTR_ADD66:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C67]](s32) - ; CHECK: G_STORE [[C4]](s32), [[PTR_ADD66]](p5) :: (store 4 into %stack.1 + 256, align 256, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load 4 from %stack.1 + 256, align 256, addrspace 5) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from %stack.1 + 260, align 256, addrspace 5) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from %stack.1 + 264, align 256, addrspace 5) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from %stack.1 + 268, align 256, addrspace 5) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from %stack.1 + 272, align 256, addrspace 5) - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 4 from %stack.1 + 276, align 256, addrspace 5) - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 4 from %stack.1 + 280, align 256, addrspace 5) - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 4 from %stack.1 + 284, align 256, addrspace 5) - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 4 from %stack.1 + 288, align 256, addrspace 5) - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 4 from %stack.1 + 292, align 256, addrspace 5) - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 4 from %stack.1 + 296, align 256, addrspace 5) - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 4 from %stack.1 + 300, align 256, addrspace 5) - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 4 from %stack.1 + 304, align 256, addrspace 5) - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 4 from %stack.1 + 308, align 256, addrspace 5) - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 4 from %stack.1 + 312, align 256, addrspace 5) - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 4 from %stack.1 + 316, align 256, addrspace 5) - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 4 from %stack.1 + 320, align 256, addrspace 5) - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 4 from %stack.1 + 324, align 256, addrspace 5) - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 4 from %stack.1 + 328, align 256, addrspace 5) - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 4 from %stack.1 + 332, align 256, addrspace 5) - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 4 from %stack.1 + 336, align 256, addrspace 5) - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load 4 from %stack.1 + 340, align 256, addrspace 5) - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load 4 from %stack.1 + 344, align 256, addrspace 5) - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load 4 from %stack.1 + 348, align 256, addrspace 5) - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load 4 from %stack.1 + 352, align 256, addrspace 5) - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load 4 from %stack.1 + 356, align 256, addrspace 5) - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load 4 from %stack.1 + 360, align 256, addrspace 5) - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load 4 from %stack.1 + 364, align 256, addrspace 5) - ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load 4 from %stack.1 + 368, align 256, addrspace 5) - ; CHECK: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load 4 from %stack.1 + 372, align 256, addrspace 5) - ; CHECK: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load 4 from %stack.1 + 376, align 256, addrspace 5) - ; CHECK: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load 4 from %stack.1 + 380, align 256, addrspace 5) - ; CHECK: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load 4 from %stack.1 + 384, align 256, addrspace 5) - ; CHECK: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD35]](p5) :: (load 4 from %stack.1 + 388, align 256, addrspace 5) - ; CHECK: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load 4 from %stack.1 + 392, align 256, addrspace 5) - ; CHECK: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load 4 from %stack.1 + 396, align 256, addrspace 5) - ; CHECK: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load 4 from %stack.1 + 400, align 256, addrspace 5) - ; CHECK: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load 4 from %stack.1 + 404, align 256, addrspace 5) - ; CHECK: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load 4 from %stack.1 + 408, align 256, addrspace 5) - ; CHECK: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load 4 from %stack.1 + 412, align 256, addrspace 5) - ; CHECK: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load 4 from %stack.1 + 416, align 256, addrspace 5) - ; CHECK: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load 4 from %stack.1 + 420, align 256, addrspace 5) - ; CHECK: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load 4 from %stack.1 + 424, align 256, addrspace 5) - ; CHECK: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load 4 from %stack.1 + 428, align 256, addrspace 5) - ; CHECK: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load 4 from %stack.1 + 432, align 256, addrspace 5) - ; CHECK: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load 4 from %stack.1 + 436, align 256, addrspace 5) - ; CHECK: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load 4 from %stack.1 + 440, align 256, addrspace 5) - ; CHECK: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load 4 from %stack.1 + 444, align 256, addrspace 5) - ; CHECK: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load 4 from %stack.1 + 448, align 256, addrspace 5) - ; CHECK: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load 4 from %stack.1 + 452, align 256, addrspace 5) - ; CHECK: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load 4 from %stack.1 + 456, align 256, addrspace 5) - ; CHECK: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load 4 from %stack.1 + 460, align 256, addrspace 5) - ; CHECK: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load 4 from %stack.1 + 464, align 256, addrspace 5) - ; CHECK: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load 4 from %stack.1 + 468, align 256, addrspace 5) - ; CHECK: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load 4 from %stack.1 + 472, align 256, addrspace 5) - ; CHECK: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load 4 from %stack.1 + 476, align 256, addrspace 5) - ; CHECK: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load 4 from %stack.1 + 480, align 256, addrspace 5) - ; CHECK: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load 4 from %stack.1 + 484, align 256, addrspace 5) - ; CHECK: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load 4 from %stack.1 + 488, align 256, addrspace 5) - ; CHECK: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load 4 from %stack.1 + 492, align 256, addrspace 5) - ; CHECK: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load 4 from %stack.1 + 496, align 256, addrspace 5) - ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load 4 from %stack.1 + 500, align 256, addrspace 5) - ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load 4 from %stack.1 + 504, align 256, addrspace 5) - ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load 4 from %stack.1 + 508, align 256, addrspace 5) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV64]](s32), [[FRAME_INDEX1]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) - ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C5]](s32) - ; CHECK: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD67]](p5) - ; CHECK: G_STORE [[UV65]](s32), [[COPY64]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) - ; CHECK: [[PTR_ADD68:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C6]](s32) - ; CHECK: [[COPY65:%[0-9]+]]:_(p5) = COPY [[PTR_ADD68]](p5) - ; CHECK: G_STORE [[UV66]](s32), [[COPY65]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) - ; CHECK: [[PTR_ADD69:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C7]](s32) - ; CHECK: [[COPY66:%[0-9]+]]:_(p5) = COPY [[PTR_ADD69]](p5) - ; CHECK: G_STORE [[UV67]](s32), [[COPY66]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) - ; CHECK: [[PTR_ADD70:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C8]](s32) - ; CHECK: [[COPY67:%[0-9]+]]:_(p5) = COPY [[PTR_ADD70]](p5) - ; CHECK: G_STORE [[UV68]](s32), [[COPY67]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) - ; CHECK: [[PTR_ADD71:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C9]](s32) - ; CHECK: [[COPY68:%[0-9]+]]:_(p5) = COPY [[PTR_ADD71]](p5) - ; CHECK: G_STORE [[UV69]](s32), [[COPY68]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) - ; CHECK: [[PTR_ADD72:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C10]](s32) - ; CHECK: [[COPY69:%[0-9]+]]:_(p5) = COPY [[PTR_ADD72]](p5) - ; CHECK: G_STORE [[UV70]](s32), [[COPY69]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) - ; CHECK: [[PTR_ADD73:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C11]](s32) - ; CHECK: [[COPY70:%[0-9]+]]:_(p5) = COPY [[PTR_ADD73]](p5) - ; CHECK: G_STORE [[UV71]](s32), [[COPY70]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) - ; CHECK: [[PTR_ADD74:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C12]](s32) - ; CHECK: [[COPY71:%[0-9]+]]:_(p5) = COPY [[PTR_ADD74]](p5) - ; CHECK: G_STORE [[UV72]](s32), [[COPY71]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) - ; CHECK: [[PTR_ADD75:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C13]](s32) - ; CHECK: [[COPY72:%[0-9]+]]:_(p5) = COPY [[PTR_ADD75]](p5) - ; CHECK: G_STORE [[UV73]](s32), [[COPY72]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) - ; CHECK: [[PTR_ADD76:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C14]](s32) - ; CHECK: [[COPY73:%[0-9]+]]:_(p5) = COPY [[PTR_ADD76]](p5) - ; CHECK: G_STORE [[UV74]](s32), [[COPY73]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) - ; CHECK: [[PTR_ADD77:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C15]](s32) - ; CHECK: [[COPY74:%[0-9]+]]:_(p5) = COPY [[PTR_ADD77]](p5) - ; CHECK: G_STORE [[UV75]](s32), [[COPY74]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) - ; CHECK: [[PTR_ADD78:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C16]](s32) - ; CHECK: [[COPY75:%[0-9]+]]:_(p5) = COPY [[PTR_ADD78]](p5) - ; CHECK: G_STORE [[UV76]](s32), [[COPY75]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) - ; CHECK: [[PTR_ADD79:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C17]](s32) - ; CHECK: [[COPY76:%[0-9]+]]:_(p5) = COPY [[PTR_ADD79]](p5) - ; CHECK: G_STORE [[UV77]](s32), [[COPY76]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) - ; CHECK: [[PTR_ADD80:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C18]](s32) - ; CHECK: [[COPY77:%[0-9]+]]:_(p5) = COPY [[PTR_ADD80]](p5) - ; CHECK: G_STORE [[UV78]](s32), [[COPY77]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) - ; CHECK: [[PTR_ADD81:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C19]](s32) - ; CHECK: [[COPY78:%[0-9]+]]:_(p5) = COPY [[PTR_ADD81]](p5) - ; CHECK: G_STORE [[UV79]](s32), [[COPY78]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) - ; CHECK: [[PTR_ADD82:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C]](s32) - ; CHECK: [[COPY79:%[0-9]+]]:_(p5) = COPY [[PTR_ADD82]](p5) - ; CHECK: G_STORE [[UV80]](s32), [[COPY79]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) - ; CHECK: [[PTR_ADD83:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C20]](s32) - ; CHECK: [[COPY80:%[0-9]+]]:_(p5) = COPY [[PTR_ADD83]](p5) - ; CHECK: G_STORE [[UV81]](s32), [[COPY80]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) - ; CHECK: [[PTR_ADD84:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C21]](s32) - ; CHECK: [[COPY81:%[0-9]+]]:_(p5) = COPY [[PTR_ADD84]](p5) - ; CHECK: G_STORE [[UV82]](s32), [[COPY81]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) - ; CHECK: [[PTR_ADD85:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C22]](s32) - ; CHECK: [[COPY82:%[0-9]+]]:_(p5) = COPY [[PTR_ADD85]](p5) - ; CHECK: G_STORE [[UV83]](s32), [[COPY82]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) - ; CHECK: [[PTR_ADD86:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C23]](s32) - ; CHECK: [[COPY83:%[0-9]+]]:_(p5) = COPY [[PTR_ADD86]](p5) - ; CHECK: G_STORE [[UV84]](s32), [[COPY83]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) - ; CHECK: [[PTR_ADD87:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C24]](s32) - ; CHECK: [[COPY84:%[0-9]+]]:_(p5) = COPY [[PTR_ADD87]](p5) - ; CHECK: G_STORE [[UV85]](s32), [[COPY84]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) - ; CHECK: [[PTR_ADD88:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C25]](s32) - ; CHECK: [[COPY85:%[0-9]+]]:_(p5) = COPY [[PTR_ADD88]](p5) - ; CHECK: G_STORE [[UV86]](s32), [[COPY85]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) - ; CHECK: [[PTR_ADD89:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C26]](s32) - ; CHECK: [[COPY86:%[0-9]+]]:_(p5) = COPY [[PTR_ADD89]](p5) - ; CHECK: G_STORE [[UV87]](s32), [[COPY86]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) - ; CHECK: [[PTR_ADD90:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C27]](s32) - ; CHECK: [[COPY87:%[0-9]+]]:_(p5) = COPY [[PTR_ADD90]](p5) - ; CHECK: G_STORE [[UV88]](s32), [[COPY87]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) - ; CHECK: [[PTR_ADD91:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C28]](s32) - ; CHECK: [[COPY88:%[0-9]+]]:_(p5) = COPY [[PTR_ADD91]](p5) - ; CHECK: G_STORE [[UV89]](s32), [[COPY88]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) - ; CHECK: [[PTR_ADD92:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C29]](s32) - ; CHECK: [[COPY89:%[0-9]+]]:_(p5) = COPY [[PTR_ADD92]](p5) - ; CHECK: G_STORE [[UV90]](s32), [[COPY89]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) - ; CHECK: [[PTR_ADD93:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C30]](s32) - ; CHECK: [[COPY90:%[0-9]+]]:_(p5) = COPY [[PTR_ADD93]](p5) - ; CHECK: G_STORE [[UV91]](s32), [[COPY90]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) - ; CHECK: [[PTR_ADD94:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C31]](s32) - ; CHECK: [[COPY91:%[0-9]+]]:_(p5) = COPY [[PTR_ADD94]](p5) - ; CHECK: G_STORE [[UV92]](s32), [[COPY91]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) - ; CHECK: [[PTR_ADD95:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C32]](s32) - ; CHECK: [[COPY92:%[0-9]+]]:_(p5) = COPY [[PTR_ADD95]](p5) - ; CHECK: G_STORE [[UV93]](s32), [[COPY92]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) - ; CHECK: [[PTR_ADD96:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C33]](s32) - ; CHECK: [[COPY93:%[0-9]+]]:_(p5) = COPY [[PTR_ADD96]](p5) - ; CHECK: G_STORE [[UV94]](s32), [[COPY93]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) - ; CHECK: [[PTR_ADD97:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C34]](s32) - ; CHECK: [[COPY94:%[0-9]+]]:_(p5) = COPY [[PTR_ADD97]](p5) - ; CHECK: G_STORE [[UV95]](s32), [[COPY94]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) - ; CHECK: [[PTR_ADD98:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C35]](s32) - ; CHECK: [[COPY95:%[0-9]+]]:_(p5) = COPY [[PTR_ADD98]](p5) - ; CHECK: G_STORE [[UV96]](s32), [[COPY95]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) - ; CHECK: [[PTR_ADD99:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C36]](s32) - ; CHECK: [[COPY96:%[0-9]+]]:_(p5) = COPY [[PTR_ADD99]](p5) - ; CHECK: G_STORE [[UV97]](s32), [[COPY96]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) - ; CHECK: [[PTR_ADD100:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C37]](s32) - ; CHECK: [[COPY97:%[0-9]+]]:_(p5) = COPY [[PTR_ADD100]](p5) - ; CHECK: G_STORE [[UV98]](s32), [[COPY97]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) - ; CHECK: [[PTR_ADD101:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C38]](s32) - ; CHECK: [[COPY98:%[0-9]+]]:_(p5) = COPY [[PTR_ADD101]](p5) - ; CHECK: G_STORE [[UV99]](s32), [[COPY98]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) - ; CHECK: [[PTR_ADD102:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C39]](s32) - ; CHECK: [[COPY99:%[0-9]+]]:_(p5) = COPY [[PTR_ADD102]](p5) - ; CHECK: G_STORE [[UV100]](s32), [[COPY99]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) - ; CHECK: [[PTR_ADD103:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C40]](s32) - ; CHECK: [[COPY100:%[0-9]+]]:_(p5) = COPY [[PTR_ADD103]](p5) - ; CHECK: G_STORE [[UV101]](s32), [[COPY100]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) - ; CHECK: [[PTR_ADD104:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C41]](s32) - ; CHECK: [[COPY101:%[0-9]+]]:_(p5) = COPY [[PTR_ADD104]](p5) - ; CHECK: G_STORE [[UV102]](s32), [[COPY101]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) - ; CHECK: [[PTR_ADD105:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C42]](s32) - ; CHECK: [[COPY102:%[0-9]+]]:_(p5) = COPY [[PTR_ADD105]](p5) - ; CHECK: G_STORE [[UV103]](s32), [[COPY102]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) - ; CHECK: [[PTR_ADD106:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C43]](s32) - ; CHECK: [[COPY103:%[0-9]+]]:_(p5) = COPY [[PTR_ADD106]](p5) - ; CHECK: G_STORE [[UV104]](s32), [[COPY103]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) - ; CHECK: [[PTR_ADD107:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C44]](s32) - ; CHECK: [[COPY104:%[0-9]+]]:_(p5) = COPY [[PTR_ADD107]](p5) - ; CHECK: G_STORE [[UV105]](s32), [[COPY104]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) - ; CHECK: [[PTR_ADD108:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C45]](s32) - ; CHECK: [[COPY105:%[0-9]+]]:_(p5) = COPY [[PTR_ADD108]](p5) - ; CHECK: G_STORE [[UV106]](s32), [[COPY105]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) - ; CHECK: [[PTR_ADD109:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C46]](s32) - ; CHECK: [[COPY106:%[0-9]+]]:_(p5) = COPY [[PTR_ADD109]](p5) - ; CHECK: G_STORE [[UV107]](s32), [[COPY106]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) - ; CHECK: [[PTR_ADD110:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C47]](s32) - ; CHECK: [[COPY107:%[0-9]+]]:_(p5) = COPY [[PTR_ADD110]](p5) - ; CHECK: G_STORE [[UV108]](s32), [[COPY107]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) - ; CHECK: [[PTR_ADD111:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C48]](s32) - ; CHECK: [[COPY108:%[0-9]+]]:_(p5) = COPY [[PTR_ADD111]](p5) - ; CHECK: G_STORE [[UV109]](s32), [[COPY108]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) - ; CHECK: [[PTR_ADD112:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C49]](s32) - ; CHECK: [[COPY109:%[0-9]+]]:_(p5) = COPY [[PTR_ADD112]](p5) - ; CHECK: G_STORE [[UV110]](s32), [[COPY109]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) - ; CHECK: [[PTR_ADD113:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C50]](s32) - ; CHECK: [[COPY110:%[0-9]+]]:_(p5) = COPY [[PTR_ADD113]](p5) - ; CHECK: G_STORE [[UV111]](s32), [[COPY110]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) - ; CHECK: [[PTR_ADD114:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C51]](s32) - ; CHECK: [[COPY111:%[0-9]+]]:_(p5) = COPY [[PTR_ADD114]](p5) - ; CHECK: G_STORE [[UV112]](s32), [[COPY111]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) - ; CHECK: [[PTR_ADD115:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C52]](s32) - ; CHECK: [[COPY112:%[0-9]+]]:_(p5) = COPY [[PTR_ADD115]](p5) - ; CHECK: G_STORE [[UV113]](s32), [[COPY112]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) - ; CHECK: [[PTR_ADD116:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C53]](s32) - ; CHECK: [[COPY113:%[0-9]+]]:_(p5) = COPY [[PTR_ADD116]](p5) - ; CHECK: G_STORE [[UV114]](s32), [[COPY113]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) - ; CHECK: [[PTR_ADD117:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C54]](s32) - ; CHECK: [[COPY114:%[0-9]+]]:_(p5) = COPY [[PTR_ADD117]](p5) - ; CHECK: G_STORE [[UV115]](s32), [[COPY114]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) - ; CHECK: [[PTR_ADD118:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C55]](s32) - ; CHECK: [[COPY115:%[0-9]+]]:_(p5) = COPY [[PTR_ADD118]](p5) - ; CHECK: G_STORE [[UV116]](s32), [[COPY115]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) - ; CHECK: [[PTR_ADD119:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C56]](s32) - ; CHECK: [[COPY116:%[0-9]+]]:_(p5) = COPY [[PTR_ADD119]](p5) - ; CHECK: G_STORE [[UV117]](s32), [[COPY116]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) - ; CHECK: [[PTR_ADD120:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C57]](s32) - ; CHECK: [[COPY117:%[0-9]+]]:_(p5) = COPY [[PTR_ADD120]](p5) - ; CHECK: G_STORE [[UV118]](s32), [[COPY117]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) - ; CHECK: [[PTR_ADD121:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C58]](s32) - ; CHECK: [[COPY118:%[0-9]+]]:_(p5) = COPY [[PTR_ADD121]](p5) - ; CHECK: G_STORE [[UV119]](s32), [[COPY118]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) - ; CHECK: [[PTR_ADD122:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C59]](s32) - ; CHECK: [[COPY119:%[0-9]+]]:_(p5) = COPY [[PTR_ADD122]](p5) - ; CHECK: G_STORE [[UV120]](s32), [[COPY119]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) - ; CHECK: [[PTR_ADD123:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C60]](s32) - ; CHECK: [[COPY120:%[0-9]+]]:_(p5) = COPY [[PTR_ADD123]](p5) - ; CHECK: G_STORE [[UV121]](s32), [[COPY120]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) - ; CHECK: [[PTR_ADD124:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C61]](s32) - ; CHECK: [[COPY121:%[0-9]+]]:_(p5) = COPY [[PTR_ADD124]](p5) - ; CHECK: G_STORE [[UV122]](s32), [[COPY121]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) - ; CHECK: [[PTR_ADD125:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C62]](s32) - ; CHECK: [[COPY122:%[0-9]+]]:_(p5) = COPY [[PTR_ADD125]](p5) - ; CHECK: G_STORE [[UV123]](s32), [[COPY122]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) - ; CHECK: [[PTR_ADD126:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C63]](s32) - ; CHECK: [[COPY123:%[0-9]+]]:_(p5) = COPY [[PTR_ADD126]](p5) - ; CHECK: G_STORE [[UV124]](s32), [[COPY123]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) - ; CHECK: [[PTR_ADD127:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C64]](s32) - ; CHECK: [[COPY124:%[0-9]+]]:_(p5) = COPY [[PTR_ADD127]](p5) - ; CHECK: G_STORE [[UV125]](s32), [[COPY124]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) - ; CHECK: [[PTR_ADD128:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C65]](s32) - ; CHECK: [[COPY125:%[0-9]+]]:_(p5) = COPY [[PTR_ADD128]](p5) - ; CHECK: G_STORE [[UV126]](s32), [[COPY125]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) - ; CHECK: [[PTR_ADD129:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C66]](s32) - ; CHECK: [[COPY126:%[0-9]+]]:_(p5) = COPY [[PTR_ADD129]](p5) - ; CHECK: G_STORE [[UV127]](s32), [[COPY126]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) - ; CHECK: [[C68:%[0-9]+]]:_(s32) = G_CONSTANT i32 260 - ; CHECK: [[PTR_ADD130:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C68]](s32) - ; CHECK: G_STORE [[C4]](s32), [[PTR_ADD130]](p5) :: (store 4 into %stack.0 + 260, addrspace 5) - ; CHECK: [[LOAD68:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (load 4 from %stack.0 + 260, align 256, addrspace 5) - ; CHECK: [[LOAD69:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD67]](p5) :: (load 4 from %stack.0 + 264, align 256, addrspace 5) - ; CHECK: [[LOAD70:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD68]](p5) :: (load 4 from %stack.0 + 268, align 256, addrspace 5) - ; CHECK: [[LOAD71:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD69]](p5) :: (load 4 from %stack.0 + 272, align 256, addrspace 5) - ; CHECK: [[LOAD72:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD70]](p5) :: (load 4 from %stack.0 + 276, align 256, addrspace 5) - ; CHECK: [[LOAD73:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD71]](p5) :: (load 4 from %stack.0 + 280, align 256, addrspace 5) - ; CHECK: [[LOAD74:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD72]](p5) :: (load 4 from %stack.0 + 284, align 256, addrspace 5) - ; CHECK: [[LOAD75:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD73]](p5) :: (load 4 from %stack.0 + 288, align 256, addrspace 5) - ; CHECK: [[LOAD76:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD74]](p5) :: (load 4 from %stack.0 + 292, align 256, addrspace 5) - ; CHECK: [[LOAD77:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD75]](p5) :: (load 4 from %stack.0 + 296, align 256, addrspace 5) - ; CHECK: [[LOAD78:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD76]](p5) :: (load 4 from %stack.0 + 300, align 256, addrspace 5) - ; CHECK: [[LOAD79:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD77]](p5) :: (load 4 from %stack.0 + 304, align 256, addrspace 5) - ; CHECK: [[LOAD80:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD78]](p5) :: (load 4 from %stack.0 + 308, align 256, addrspace 5) - ; CHECK: [[LOAD81:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD79]](p5) :: (load 4 from %stack.0 + 312, align 256, addrspace 5) - ; CHECK: [[LOAD82:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD80]](p5) :: (load 4 from %stack.0 + 316, align 256, addrspace 5) - ; CHECK: [[LOAD83:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD81]](p5) :: (load 4 from %stack.0 + 320, align 256, addrspace 5) - ; CHECK: [[LOAD84:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD82]](p5) :: (load 4 from %stack.0 + 324, align 256, addrspace 5) - ; CHECK: [[LOAD85:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD83]](p5) :: (load 4 from %stack.0 + 328, align 256, addrspace 5) - ; CHECK: [[LOAD86:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD84]](p5) :: (load 4 from %stack.0 + 332, align 256, addrspace 5) - ; CHECK: [[LOAD87:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD85]](p5) :: (load 4 from %stack.0 + 336, align 256, addrspace 5) - ; CHECK: [[LOAD88:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD86]](p5) :: (load 4 from %stack.0 + 340, align 256, addrspace 5) - ; CHECK: [[LOAD89:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD87]](p5) :: (load 4 from %stack.0 + 344, align 256, addrspace 5) - ; CHECK: [[LOAD90:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD88]](p5) :: (load 4 from %stack.0 + 348, align 256, addrspace 5) - ; CHECK: [[LOAD91:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD89]](p5) :: (load 4 from %stack.0 + 352, align 256, addrspace 5) - ; CHECK: [[LOAD92:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD90]](p5) :: (load 4 from %stack.0 + 356, align 256, addrspace 5) - ; CHECK: [[LOAD93:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD91]](p5) :: (load 4 from %stack.0 + 360, align 256, addrspace 5) - ; CHECK: [[LOAD94:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD92]](p5) :: (load 4 from %stack.0 + 364, align 256, addrspace 5) - ; CHECK: [[LOAD95:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD93]](p5) :: (load 4 from %stack.0 + 368, align 256, addrspace 5) - ; CHECK: [[LOAD96:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD94]](p5) :: (load 4 from %stack.0 + 372, align 256, addrspace 5) - ; CHECK: [[LOAD97:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD95]](p5) :: (load 4 from %stack.0 + 376, align 256, addrspace 5) - ; CHECK: [[LOAD98:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD96]](p5) :: (load 4 from %stack.0 + 380, align 256, addrspace 5) - ; CHECK: [[LOAD99:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD97]](p5) :: (load 4 from %stack.0 + 384, align 256, addrspace 5) - ; CHECK: [[LOAD100:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD98]](p5) :: (load 4 from %stack.0 + 388, align 256, addrspace 5) - ; CHECK: [[LOAD101:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD99]](p5) :: (load 4 from %stack.0 + 392, align 256, addrspace 5) - ; CHECK: [[LOAD102:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD100]](p5) :: (load 4 from %stack.0 + 396, align 256, addrspace 5) - ; CHECK: [[LOAD103:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD101]](p5) :: (load 4 from %stack.0 + 400, align 256, addrspace 5) - ; CHECK: [[LOAD104:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD102]](p5) :: (load 4 from %stack.0 + 404, align 256, addrspace 5) - ; CHECK: [[LOAD105:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD103]](p5) :: (load 4 from %stack.0 + 408, align 256, addrspace 5) - ; CHECK: [[LOAD106:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD104]](p5) :: (load 4 from %stack.0 + 412, align 256, addrspace 5) - ; CHECK: [[LOAD107:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD105]](p5) :: (load 4 from %stack.0 + 416, align 256, addrspace 5) - ; CHECK: [[LOAD108:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD106]](p5) :: (load 4 from %stack.0 + 420, align 256, addrspace 5) - ; CHECK: [[LOAD109:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD107]](p5) :: (load 4 from %stack.0 + 424, align 256, addrspace 5) - ; CHECK: [[LOAD110:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD108]](p5) :: (load 4 from %stack.0 + 428, align 256, addrspace 5) - ; CHECK: [[LOAD111:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD109]](p5) :: (load 4 from %stack.0 + 432, align 256, addrspace 5) - ; CHECK: [[LOAD112:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD110]](p5) :: (load 4 from %stack.0 + 436, align 256, addrspace 5) - ; CHECK: [[LOAD113:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD111]](p5) :: (load 4 from %stack.0 + 440, align 256, addrspace 5) - ; CHECK: [[LOAD114:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD112]](p5) :: (load 4 from %stack.0 + 444, align 256, addrspace 5) - ; CHECK: [[LOAD115:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD113]](p5) :: (load 4 from %stack.0 + 448, align 256, addrspace 5) - ; CHECK: [[LOAD116:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD114]](p5) :: (load 4 from %stack.0 + 452, align 256, addrspace 5) - ; CHECK: [[LOAD117:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD115]](p5) :: (load 4 from %stack.0 + 456, align 256, addrspace 5) - ; CHECK: [[LOAD118:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD116]](p5) :: (load 4 from %stack.0 + 460, align 256, addrspace 5) - ; CHECK: [[LOAD119:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD117]](p5) :: (load 4 from %stack.0 + 464, align 256, addrspace 5) - ; CHECK: [[LOAD120:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD118]](p5) :: (load 4 from %stack.0 + 468, align 256, addrspace 5) - ; CHECK: [[LOAD121:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD119]](p5) :: (load 4 from %stack.0 + 472, align 256, addrspace 5) - ; CHECK: [[LOAD122:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD120]](p5) :: (load 4 from %stack.0 + 476, align 256, addrspace 5) - ; CHECK: [[LOAD123:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD121]](p5) :: (load 4 from %stack.0 + 480, align 256, addrspace 5) - ; CHECK: [[LOAD124:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD122]](p5) :: (load 4 from %stack.0 + 484, align 256, addrspace 5) - ; CHECK: [[LOAD125:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD123]](p5) :: (load 4 from %stack.0 + 488, align 256, addrspace 5) - ; CHECK: [[LOAD126:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD124]](p5) :: (load 4 from %stack.0 + 492, align 256, addrspace 5) - ; CHECK: [[LOAD127:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD125]](p5) :: (load 4 from %stack.0 + 496, align 256, addrspace 5) - ; CHECK: [[LOAD128:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD126]](p5) :: (load 4 from %stack.0 + 500, align 256, addrspace 5) - ; CHECK: [[LOAD129:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD127]](p5) :: (load 4 from %stack.0 + 504, align 256, addrspace 5) - ; CHECK: [[LOAD130:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD128]](p5) :: (load 4 from %stack.0 + 508, align 256, addrspace 5) - ; CHECK: [[LOAD131:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD129]](p5) :: (load 4 from %stack.0 + 512, align 256, addrspace 5) - ; CHECK: [[COPY127:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY128:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32) - ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) - ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD32]](s32), [[LOAD33]](s32), [[LOAD34]](s32), [[LOAD35]](s32) - ; CHECK: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD36]](s32), [[LOAD37]](s32), [[LOAD38]](s32), [[LOAD39]](s32) - ; CHECK: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD40]](s32), [[LOAD41]](s32), [[LOAD42]](s32), [[LOAD43]](s32) - ; CHECK: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD44]](s32), [[LOAD45]](s32), [[LOAD46]](s32), [[LOAD47]](s32) - ; CHECK: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD48]](s32), [[LOAD49]](s32), [[LOAD50]](s32), [[LOAD51]](s32) - ; CHECK: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD52]](s32), [[LOAD53]](s32), [[LOAD54]](s32), [[LOAD55]](s32) - ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) - ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) - ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY127]](p1) :: (store 16, align 4, addrspace 1) - ; CHECK: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD131:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C69]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD131]](p1) :: (store 16 + 16, align 4, addrspace 1) - ; CHECK: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD132:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C70]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD132]](p1) :: (store 16 + 32, align 4, addrspace 1) - ; CHECK: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD133:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C71]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD133]](p1) :: (store 16 + 48, align 4, addrspace 1) - ; CHECK: [[PTR_ADD134:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD134]](p1) :: (store 16 + 64, align 4, addrspace 1) - ; CHECK: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK: [[PTR_ADD135:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C72]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD135]](p1) :: (store 16 + 80, align 4, addrspace 1) - ; CHECK: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK: [[PTR_ADD136:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C73]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD136]](p1) :: (store 16 + 96, align 4, addrspace 1) - ; CHECK: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK: [[PTR_ADD137:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C74]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD137]](p1) :: (store 16 + 112, align 4, addrspace 1) - ; CHECK: [[PTR_ADD138:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD138]](p1) :: (store 16 + 128, align 4, addrspace 1) - ; CHECK: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD139:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C75]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD139]](p1) :: (store 16 + 144, align 4, addrspace 1) - ; CHECK: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK: [[PTR_ADD140:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C76]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD140]](p1) :: (store 16 + 160, align 4, addrspace 1) - ; CHECK: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK: [[PTR_ADD141:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C77]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD141]](p1) :: (store 16 + 176, align 4, addrspace 1) - ; CHECK: [[PTR_ADD142:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C3]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD142]](p1) :: (store 16 + 192, align 4, addrspace 1) - ; CHECK: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK: [[PTR_ADD143:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C78]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD143]](p1) :: (store 16 + 208, align 4, addrspace 1) - ; CHECK: [[C79:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK: [[PTR_ADD144:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C79]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD144]](p1) :: (store 16 + 224, align 4, addrspace 1) - ; CHECK: [[C80:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK: [[PTR_ADD145:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY127]], [[C80]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD145]](p1) :: (store 16 + 240, align 4, addrspace 1) - ; CHECK: [[BUILD_VECTOR16:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD68]](s32), [[LOAD69]](s32), [[LOAD70]](s32), [[LOAD71]](s32) - ; CHECK: [[BUILD_VECTOR17:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD72]](s32), [[LOAD73]](s32), [[LOAD74]](s32), [[LOAD75]](s32) - ; CHECK: [[BUILD_VECTOR18:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD76]](s32), [[LOAD77]](s32), [[LOAD78]](s32), [[LOAD79]](s32) - ; CHECK: [[BUILD_VECTOR19:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD80]](s32), [[LOAD81]](s32), [[LOAD82]](s32), [[LOAD83]](s32) - ; CHECK: [[BUILD_VECTOR20:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD84]](s32), [[LOAD85]](s32), [[LOAD86]](s32), [[LOAD87]](s32) - ; CHECK: [[BUILD_VECTOR21:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD88]](s32), [[LOAD89]](s32), [[LOAD90]](s32), [[LOAD91]](s32) - ; CHECK: [[BUILD_VECTOR22:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD92]](s32), [[LOAD93]](s32), [[LOAD94]](s32), [[LOAD95]](s32) - ; CHECK: [[BUILD_VECTOR23:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD96]](s32), [[LOAD97]](s32), [[LOAD98]](s32), [[LOAD99]](s32) - ; CHECK: [[BUILD_VECTOR24:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD100]](s32), [[LOAD101]](s32), [[LOAD102]](s32), [[LOAD103]](s32) - ; CHECK: [[BUILD_VECTOR25:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD104]](s32), [[LOAD105]](s32), [[LOAD106]](s32), [[LOAD107]](s32) - ; CHECK: [[BUILD_VECTOR26:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD108]](s32), [[LOAD109]](s32), [[LOAD110]](s32), [[LOAD111]](s32) - ; CHECK: [[BUILD_VECTOR27:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD112]](s32), [[LOAD113]](s32), [[LOAD114]](s32), [[LOAD115]](s32) - ; CHECK: [[BUILD_VECTOR28:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD116]](s32), [[LOAD117]](s32), [[LOAD118]](s32), [[LOAD119]](s32) - ; CHECK: [[BUILD_VECTOR29:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD120]](s32), [[LOAD121]](s32), [[LOAD122]](s32), [[LOAD123]](s32) - ; CHECK: [[BUILD_VECTOR30:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD124]](s32), [[LOAD125]](s32), [[LOAD126]](s32), [[LOAD127]](s32) - ; CHECK: [[BUILD_VECTOR31:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD128]](s32), [[LOAD129]](s32), [[LOAD130]](s32), [[LOAD131]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR16]](<4 x s32>), [[COPY128]](p1) :: (store 16, align 4, addrspace 1) - ; CHECK: [[PTR_ADD146:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C69]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR17]](<4 x s32>), [[PTR_ADD146]](p1) :: (store 16 + 16, align 4, addrspace 1) - ; CHECK: [[PTR_ADD147:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C70]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR18]](<4 x s32>), [[PTR_ADD147]](p1) :: (store 16 + 32, align 4, addrspace 1) - ; CHECK: [[PTR_ADD148:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C71]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR19]](<4 x s32>), [[PTR_ADD148]](p1) :: (store 16 + 48, align 4, addrspace 1) - ; CHECK: [[PTR_ADD149:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR20]](<4 x s32>), [[PTR_ADD149]](p1) :: (store 16 + 64, align 4, addrspace 1) - ; CHECK: [[PTR_ADD150:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C72]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR21]](<4 x s32>), [[PTR_ADD150]](p1) :: (store 16 + 80, align 4, addrspace 1) - ; CHECK: [[PTR_ADD151:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C73]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR22]](<4 x s32>), [[PTR_ADD151]](p1) :: (store 16 + 96, align 4, addrspace 1) - ; CHECK: [[PTR_ADD152:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C74]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR23]](<4 x s32>), [[PTR_ADD152]](p1) :: (store 16 + 112, align 4, addrspace 1) - ; CHECK: [[PTR_ADD153:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR24]](<4 x s32>), [[PTR_ADD153]](p1) :: (store 16 + 128, align 4, addrspace 1) - ; CHECK: [[PTR_ADD154:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C75]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR25]](<4 x s32>), [[PTR_ADD154]](p1) :: (store 16 + 144, align 4, addrspace 1) - ; CHECK: [[PTR_ADD155:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C76]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR26]](<4 x s32>), [[PTR_ADD155]](p1) :: (store 16 + 160, align 4, addrspace 1) - ; CHECK: [[PTR_ADD156:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C77]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR27]](<4 x s32>), [[PTR_ADD156]](p1) :: (store 16 + 176, align 4, addrspace 1) - ; CHECK: [[PTR_ADD157:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C3]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR28]](<4 x s32>), [[PTR_ADD157]](p1) :: (store 16 + 192, align 4, addrspace 1) - ; CHECK: [[PTR_ADD158:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C78]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR29]](<4 x s32>), [[PTR_ADD158]](p1) :: (store 16 + 208, align 4, addrspace 1) - ; CHECK: [[PTR_ADD159:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C79]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR30]](<4 x s32>), [[PTR_ADD159]](p1) :: (store 16 + 224, align 4, addrspace 1) - ; CHECK: [[PTR_ADD160:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY128]], [[C80]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR31]](<4 x s32>), [[PTR_ADD160]](p1) :: (store 16 + 240, align 4, addrspace 1) + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY $vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, align 4, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD1]](p1) :: (store 16 + 32, align 4, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD2]](p1) :: (store 16 + 48, align 4, addrspace 1) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD3]](p1) :: (store 16 + 64, align 4, addrspace 1) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD4]](p1) :: (store 16 + 80, align 4, addrspace 1) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD5]](p1) :: (store 16 + 96, align 4, addrspace 1) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD6]](p1) :: (store 16 + 112, align 4, addrspace 1) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD7]](p1) :: (store 16 + 128, align 4, addrspace 1) + ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) + ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD8]](p1) :: (store 16 + 144, align 4, addrspace 1) + ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 + ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) + ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD9]](p1) :: (store 16 + 160, align 4, addrspace 1) + ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 + ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) + ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD10]](p1) :: (store 16 + 176, align 4, addrspace 1) + ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 + ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) + ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD11]](p1) :: (store 16 + 192, align 4, addrspace 1) + ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 + ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) + ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD12]](p1) :: (store 16 + 208, align 4, addrspace 1) + ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 + ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) + ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD13]](p1) :: (store 16 + 224, align 4, addrspace 1) + ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 + ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) + ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD14]](p1) :: (store 16 + 240, align 4, addrspace 1) + ; CHECK: [[UV16:%[0-9]+]]:_(<4 x s32>), [[UV17:%[0-9]+]]:_(<4 x s32>), [[UV18:%[0-9]+]]:_(<4 x s32>), [[UV19:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV20:%[0-9]+]]:_(<4 x s32>), [[UV21:%[0-9]+]]:_(<4 x s32>), [[UV22:%[0-9]+]]:_(<4 x s32>), [[UV23:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV24:%[0-9]+]]:_(<4 x s32>), [[UV25:%[0-9]+]]:_(<4 x s32>), [[UV26:%[0-9]+]]:_(<4 x s32>), [[UV27:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: [[UV28:%[0-9]+]]:_(<4 x s32>), [[UV29:%[0-9]+]]:_(<4 x s32>), [[UV30:%[0-9]+]]:_(<4 x s32>), [[UV31:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK: G_STORE [[UV16]](<4 x s32>), [[COPY2]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C]](s64) + ; CHECK: G_STORE [[UV17]](<4 x s32>), [[PTR_ADD15]](p1) :: (store 16 + 16, align 4, addrspace 1) + ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C1]](s64) + ; CHECK: G_STORE [[UV18]](<4 x s32>), [[PTR_ADD16]](p1) :: (store 16 + 32, align 4, addrspace 1) + ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; CHECK: G_STORE [[UV19]](<4 x s32>), [[PTR_ADD17]](p1) :: (store 16 + 48, align 4, addrspace 1) + ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C3]](s64) + ; CHECK: G_STORE [[UV20]](<4 x s32>), [[PTR_ADD18]](p1) :: (store 16 + 64, align 4, addrspace 1) + ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C4]](s64) + ; CHECK: G_STORE [[UV21]](<4 x s32>), [[PTR_ADD19]](p1) :: (store 16 + 80, align 4, addrspace 1) + ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C5]](s64) + ; CHECK: G_STORE [[UV22]](<4 x s32>), [[PTR_ADD20]](p1) :: (store 16 + 96, align 4, addrspace 1) + ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C6]](s64) + ; CHECK: G_STORE [[UV23]](<4 x s32>), [[PTR_ADD21]](p1) :: (store 16 + 112, align 4, addrspace 1) + ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C7]](s64) + ; CHECK: G_STORE [[UV24]](<4 x s32>), [[PTR_ADD22]](p1) :: (store 16 + 128, align 4, addrspace 1) + ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C8]](s64) + ; CHECK: G_STORE [[UV25]](<4 x s32>), [[PTR_ADD23]](p1) :: (store 16 + 144, align 4, addrspace 1) + ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C9]](s64) + ; CHECK: G_STORE [[UV26]](<4 x s32>), [[PTR_ADD24]](p1) :: (store 16 + 160, align 4, addrspace 1) + ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C10]](s64) + ; CHECK: G_STORE [[UV27]](<4 x s32>), [[PTR_ADD25]](p1) :: (store 16 + 176, align 4, addrspace 1) + ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C11]](s64) + ; CHECK: G_STORE [[UV28]](<4 x s32>), [[PTR_ADD26]](p1) :: (store 16 + 192, align 4, addrspace 1) + ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C12]](s64) + ; CHECK: G_STORE [[UV29]](<4 x s32>), [[PTR_ADD27]](p1) :: (store 16 + 208, align 4, addrspace 1) + ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C13]](s64) + ; CHECK: G_STORE [[UV30]](<4 x s32>), [[PTR_ADD28]](p1) :: (store 16 + 224, align 4, addrspace 1) + ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY2]], [[C14]](s64) + ; CHECK: G_STORE [[UV31]](<4 x s32>), [[PTR_ADD29]](p1) :: (store 16 + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 64 %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) @@ -929,390 +311,55 @@ body: | ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 64 + 192, align 4, addrspace 4) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) - ; CHECK: G_STORE [[UV]](s32), [[FRAME_INDEX]](p5) :: (store 4 into %stack.0, align 256, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD3]](p5) - ; CHECK: G_STORE [[UV1]](s32), [[COPY1]](p5) :: (store 4 into %stack.0 + 4, align 256, addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(p5) = COPY [[PTR_ADD4]](p5) - ; CHECK: G_STORE [[UV2]](s32), [[COPY2]](p5) :: (store 4 into %stack.0 + 8, align 256, addrspace 5) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(p5) = COPY [[PTR_ADD5]](p5) - ; CHECK: G_STORE [[UV3]](s32), [[COPY3]](p5) :: (store 4 into %stack.0 + 12, align 256, addrspace 5) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(p5) = COPY [[PTR_ADD6]](p5) - ; CHECK: G_STORE [[UV4]](s32), [[COPY4]](p5) :: (store 4 into %stack.0 + 16, align 256, addrspace 5) - ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(p5) = COPY [[PTR_ADD7]](p5) - ; CHECK: G_STORE [[UV5]](s32), [[COPY5]](p5) :: (store 4 into %stack.0 + 20, align 256, addrspace 5) - ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(p5) = COPY [[PTR_ADD8]](p5) - ; CHECK: G_STORE [[UV6]](s32), [[COPY6]](p5) :: (store 4 into %stack.0 + 24, align 256, addrspace 5) - ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(p5) = COPY [[PTR_ADD9]](p5) - ; CHECK: G_STORE [[UV7]](s32), [[COPY7]](p5) :: (store 4 into %stack.0 + 28, align 256, addrspace 5) - ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(p5) = COPY [[PTR_ADD10]](p5) - ; CHECK: G_STORE [[UV8]](s32), [[COPY8]](p5) :: (store 4 into %stack.0 + 32, align 256, addrspace 5) - ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(p5) = COPY [[PTR_ADD11]](p5) - ; CHECK: G_STORE [[UV9]](s32), [[COPY9]](p5) :: (store 4 into %stack.0 + 36, align 256, addrspace 5) - ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(p5) = COPY [[PTR_ADD12]](p5) - ; CHECK: G_STORE [[UV10]](s32), [[COPY10]](p5) :: (store 4 into %stack.0 + 40, align 256, addrspace 5) - ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(p5) = COPY [[PTR_ADD13]](p5) - ; CHECK: G_STORE [[UV11]](s32), [[COPY11]](p5) :: (store 4 into %stack.0 + 44, align 256, addrspace 5) - ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(p5) = COPY [[PTR_ADD14]](p5) - ; CHECK: G_STORE [[UV12]](s32), [[COPY12]](p5) :: (store 4 into %stack.0 + 48, align 256, addrspace 5) - ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(p5) = COPY [[PTR_ADD15]](p5) - ; CHECK: G_STORE [[UV13]](s32), [[COPY13]](p5) :: (store 4 into %stack.0 + 52, align 256, addrspace 5) - ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(p5) = COPY [[PTR_ADD16]](p5) - ; CHECK: G_STORE [[UV14]](s32), [[COPY14]](p5) :: (store 4 into %stack.0 + 56, align 256, addrspace 5) - ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(p5) = COPY [[PTR_ADD17]](p5) - ; CHECK: G_STORE [[UV15]](s32), [[COPY15]](p5) :: (store 4 into %stack.0 + 60, align 256, addrspace 5) - ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(p5) = COPY [[PTR_ADD18]](p5) - ; CHECK: G_STORE [[UV16]](s32), [[COPY16]](p5) :: (store 4 into %stack.0 + 64, align 256, addrspace 5) - ; CHECK: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:_(p5) = COPY [[PTR_ADD19]](p5) - ; CHECK: G_STORE [[UV17]](s32), [[COPY17]](p5) :: (store 4 into %stack.0 + 68, align 256, addrspace 5) - ; CHECK: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:_(p5) = COPY [[PTR_ADD20]](p5) - ; CHECK: G_STORE [[UV18]](s32), [[COPY18]](p5) :: (store 4 into %stack.0 + 72, align 256, addrspace 5) - ; CHECK: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK: [[COPY19:%[0-9]+]]:_(p5) = COPY [[PTR_ADD21]](p5) - ; CHECK: G_STORE [[UV19]](s32), [[COPY19]](p5) :: (store 4 into %stack.0 + 76, align 256, addrspace 5) - ; CHECK: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK: [[COPY20:%[0-9]+]]:_(p5) = COPY [[PTR_ADD22]](p5) - ; CHECK: G_STORE [[UV20]](s32), [[COPY20]](p5) :: (store 4 into %stack.0 + 80, align 256, addrspace 5) - ; CHECK: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK: [[COPY21:%[0-9]+]]:_(p5) = COPY [[PTR_ADD23]](p5) - ; CHECK: G_STORE [[UV21]](s32), [[COPY21]](p5) :: (store 4 into %stack.0 + 84, align 256, addrspace 5) - ; CHECK: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK: [[COPY22:%[0-9]+]]:_(p5) = COPY [[PTR_ADD24]](p5) - ; CHECK: G_STORE [[UV22]](s32), [[COPY22]](p5) :: (store 4 into %stack.0 + 88, align 256, addrspace 5) - ; CHECK: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK: [[COPY23:%[0-9]+]]:_(p5) = COPY [[PTR_ADD25]](p5) - ; CHECK: G_STORE [[UV23]](s32), [[COPY23]](p5) :: (store 4 into %stack.0 + 92, align 256, addrspace 5) - ; CHECK: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK: [[COPY24:%[0-9]+]]:_(p5) = COPY [[PTR_ADD26]](p5) - ; CHECK: G_STORE [[UV24]](s32), [[COPY24]](p5) :: (store 4 into %stack.0 + 96, align 256, addrspace 5) - ; CHECK: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK: [[COPY25:%[0-9]+]]:_(p5) = COPY [[PTR_ADD27]](p5) - ; CHECK: G_STORE [[UV25]](s32), [[COPY25]](p5) :: (store 4 into %stack.0 + 100, align 256, addrspace 5) - ; CHECK: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK: [[COPY26:%[0-9]+]]:_(p5) = COPY [[PTR_ADD28]](p5) - ; CHECK: G_STORE [[UV26]](s32), [[COPY26]](p5) :: (store 4 into %stack.0 + 104, align 256, addrspace 5) - ; CHECK: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK: [[COPY27:%[0-9]+]]:_(p5) = COPY [[PTR_ADD29]](p5) - ; CHECK: G_STORE [[UV27]](s32), [[COPY27]](p5) :: (store 4 into %stack.0 + 108, align 256, addrspace 5) - ; CHECK: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK: [[COPY28:%[0-9]+]]:_(p5) = COPY [[PTR_ADD30]](p5) - ; CHECK: G_STORE [[UV28]](s32), [[COPY28]](p5) :: (store 4 into %stack.0 + 112, align 256, addrspace 5) - ; CHECK: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK: [[COPY29:%[0-9]+]]:_(p5) = COPY [[PTR_ADD31]](p5) - ; CHECK: G_STORE [[UV29]](s32), [[COPY29]](p5) :: (store 4 into %stack.0 + 116, align 256, addrspace 5) - ; CHECK: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK: [[COPY30:%[0-9]+]]:_(p5) = COPY [[PTR_ADD32]](p5) - ; CHECK: G_STORE [[UV30]](s32), [[COPY30]](p5) :: (store 4 into %stack.0 + 120, align 256, addrspace 5) - ; CHECK: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK: [[COPY31:%[0-9]+]]:_(p5) = COPY [[PTR_ADD33]](p5) - ; CHECK: G_STORE [[UV31]](s32), [[COPY31]](p5) :: (store 4 into %stack.0 + 124, align 256, addrspace 5) - ; CHECK: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK: [[COPY32:%[0-9]+]]:_(p5) = COPY [[PTR_ADD34]](p5) - ; CHECK: G_STORE [[UV32]](s32), [[COPY32]](p5) :: (store 4 into %stack.0 + 128, align 256, addrspace 5) - ; CHECK: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK: [[COPY33:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: G_STORE [[UV33]](s32), [[COPY33]](p5) :: (store 4 into %stack.0 + 132, align 256, addrspace 5) - ; CHECK: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK: [[COPY34:%[0-9]+]]:_(p5) = COPY [[PTR_ADD36]](p5) - ; CHECK: G_STORE [[UV34]](s32), [[COPY34]](p5) :: (store 4 into %stack.0 + 136, align 256, addrspace 5) - ; CHECK: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK: [[COPY35:%[0-9]+]]:_(p5) = COPY [[PTR_ADD37]](p5) - ; CHECK: G_STORE [[UV35]](s32), [[COPY35]](p5) :: (store 4 into %stack.0 + 140, align 256, addrspace 5) - ; CHECK: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK: [[COPY36:%[0-9]+]]:_(p5) = COPY [[PTR_ADD38]](p5) - ; CHECK: G_STORE [[UV36]](s32), [[COPY36]](p5) :: (store 4 into %stack.0 + 144, align 256, addrspace 5) - ; CHECK: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK: [[COPY37:%[0-9]+]]:_(p5) = COPY [[PTR_ADD39]](p5) - ; CHECK: G_STORE [[UV37]](s32), [[COPY37]](p5) :: (store 4 into %stack.0 + 148, align 256, addrspace 5) - ; CHECK: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK: [[COPY38:%[0-9]+]]:_(p5) = COPY [[PTR_ADD40]](p5) - ; CHECK: G_STORE [[UV38]](s32), [[COPY38]](p5) :: (store 4 into %stack.0 + 152, align 256, addrspace 5) - ; CHECK: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK: [[COPY39:%[0-9]+]]:_(p5) = COPY [[PTR_ADD41]](p5) - ; CHECK: G_STORE [[UV39]](s32), [[COPY39]](p5) :: (store 4 into %stack.0 + 156, align 256, addrspace 5) - ; CHECK: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK: [[COPY40:%[0-9]+]]:_(p5) = COPY [[PTR_ADD42]](p5) - ; CHECK: G_STORE [[UV40]](s32), [[COPY40]](p5) :: (store 4 into %stack.0 + 160, align 256, addrspace 5) - ; CHECK: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK: [[COPY41:%[0-9]+]]:_(p5) = COPY [[PTR_ADD43]](p5) - ; CHECK: G_STORE [[UV41]](s32), [[COPY41]](p5) :: (store 4 into %stack.0 + 164, align 256, addrspace 5) - ; CHECK: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK: [[COPY42:%[0-9]+]]:_(p5) = COPY [[PTR_ADD44]](p5) - ; CHECK: G_STORE [[UV42]](s32), [[COPY42]](p5) :: (store 4 into %stack.0 + 168, align 256, addrspace 5) - ; CHECK: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK: [[COPY43:%[0-9]+]]:_(p5) = COPY [[PTR_ADD45]](p5) - ; CHECK: G_STORE [[UV43]](s32), [[COPY43]](p5) :: (store 4 into %stack.0 + 172, align 256, addrspace 5) - ; CHECK: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK: [[COPY44:%[0-9]+]]:_(p5) = COPY [[PTR_ADD46]](p5) - ; CHECK: G_STORE [[UV44]](s32), [[COPY44]](p5) :: (store 4 into %stack.0 + 176, align 256, addrspace 5) - ; CHECK: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK: [[COPY45:%[0-9]+]]:_(p5) = COPY [[PTR_ADD47]](p5) - ; CHECK: G_STORE [[UV45]](s32), [[COPY45]](p5) :: (store 4 into %stack.0 + 180, align 256, addrspace 5) - ; CHECK: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK: [[COPY46:%[0-9]+]]:_(p5) = COPY [[PTR_ADD48]](p5) - ; CHECK: G_STORE [[UV46]](s32), [[COPY46]](p5) :: (store 4 into %stack.0 + 184, align 256, addrspace 5) - ; CHECK: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK: [[COPY47:%[0-9]+]]:_(p5) = COPY [[PTR_ADD49]](p5) - ; CHECK: G_STORE [[UV47]](s32), [[COPY47]](p5) :: (store 4 into %stack.0 + 188, align 256, addrspace 5) - ; CHECK: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK: [[COPY48:%[0-9]+]]:_(p5) = COPY [[PTR_ADD50]](p5) - ; CHECK: G_STORE [[UV48]](s32), [[COPY48]](p5) :: (store 4 into %stack.0 + 192, align 256, addrspace 5) - ; CHECK: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK: [[COPY49:%[0-9]+]]:_(p5) = COPY [[PTR_ADD51]](p5) - ; CHECK: G_STORE [[UV49]](s32), [[COPY49]](p5) :: (store 4 into %stack.0 + 196, align 256, addrspace 5) - ; CHECK: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK: [[COPY50:%[0-9]+]]:_(p5) = COPY [[PTR_ADD52]](p5) - ; CHECK: G_STORE [[UV50]](s32), [[COPY50]](p5) :: (store 4 into %stack.0 + 200, align 256, addrspace 5) - ; CHECK: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK: [[COPY51:%[0-9]+]]:_(p5) = COPY [[PTR_ADD53]](p5) - ; CHECK: G_STORE [[UV51]](s32), [[COPY51]](p5) :: (store 4 into %stack.0 + 204, align 256, addrspace 5) - ; CHECK: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK: [[COPY52:%[0-9]+]]:_(p5) = COPY [[PTR_ADD54]](p5) - ; CHECK: G_STORE [[UV52]](s32), [[COPY52]](p5) :: (store 4 into %stack.0 + 208, align 256, addrspace 5) - ; CHECK: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK: [[COPY53:%[0-9]+]]:_(p5) = COPY [[PTR_ADD55]](p5) - ; CHECK: G_STORE [[UV53]](s32), [[COPY53]](p5) :: (store 4 into %stack.0 + 212, align 256, addrspace 5) - ; CHECK: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK: [[COPY54:%[0-9]+]]:_(p5) = COPY [[PTR_ADD56]](p5) - ; CHECK: G_STORE [[UV54]](s32), [[COPY54]](p5) :: (store 4 into %stack.0 + 216, align 256, addrspace 5) - ; CHECK: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK: [[COPY55:%[0-9]+]]:_(p5) = COPY [[PTR_ADD57]](p5) - ; CHECK: G_STORE [[UV55]](s32), [[COPY55]](p5) :: (store 4 into %stack.0 + 220, align 256, addrspace 5) - ; CHECK: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK: [[COPY56:%[0-9]+]]:_(p5) = COPY [[PTR_ADD58]](p5) - ; CHECK: G_STORE [[UV56]](s32), [[COPY56]](p5) :: (store 4 into %stack.0 + 224, align 256, addrspace 5) - ; CHECK: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK: [[COPY57:%[0-9]+]]:_(p5) = COPY [[PTR_ADD59]](p5) - ; CHECK: G_STORE [[UV57]](s32), [[COPY57]](p5) :: (store 4 into %stack.0 + 228, align 256, addrspace 5) - ; CHECK: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK: [[COPY58:%[0-9]+]]:_(p5) = COPY [[PTR_ADD60]](p5) - ; CHECK: G_STORE [[UV58]](s32), [[COPY58]](p5) :: (store 4 into %stack.0 + 232, align 256, addrspace 5) - ; CHECK: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK: [[COPY59:%[0-9]+]]:_(p5) = COPY [[PTR_ADD61]](p5) - ; CHECK: G_STORE [[UV59]](s32), [[COPY59]](p5) :: (store 4 into %stack.0 + 236, align 256, addrspace 5) - ; CHECK: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK: [[COPY60:%[0-9]+]]:_(p5) = COPY [[PTR_ADD62]](p5) - ; CHECK: G_STORE [[UV60]](s32), [[COPY60]](p5) :: (store 4 into %stack.0 + 240, align 256, addrspace 5) - ; CHECK: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK: [[COPY61:%[0-9]+]]:_(p5) = COPY [[PTR_ADD63]](p5) - ; CHECK: G_STORE [[UV61]](s32), [[COPY61]](p5) :: (store 4 into %stack.0 + 244, align 256, addrspace 5) - ; CHECK: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK: [[COPY62:%[0-9]+]]:_(p5) = COPY [[PTR_ADD64]](p5) - ; CHECK: G_STORE [[UV62]](s32), [[COPY62]](p5) :: (store 4 into %stack.0 + 248, align 256, addrspace 5) - ; CHECK: [[C66:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C66]](s32) - ; CHECK: [[COPY63:%[0-9]+]]:_(p5) = COPY [[PTR_ADD65]](p5) - ; CHECK: G_STORE [[UV63]](s32), [[COPY63]](p5) :: (store 4 into %stack.0 + 252, align 256, addrspace 5) - ; CHECK: G_STORE [[C3]](s32), [[PTR_ADD35]](p5) :: (store 4 into %stack.0 + 132, addrspace 5) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (load 4 from %stack.0 + 132, align 256, addrspace 5) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (load 4 from %stack.0 + 136, align 256, addrspace 5) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load 4 from %stack.0 + 140, align 256, addrspace 5) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p5) :: (load 4 from %stack.0 + 144, align 256, addrspace 5) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p5) :: (load 4 from %stack.0 + 148, align 256, addrspace 5) - ; CHECK: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p5) :: (load 4 from %stack.0 + 152, align 256, addrspace 5) - ; CHECK: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p5) :: (load 4 from %stack.0 + 156, align 256, addrspace 5) - ; CHECK: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p5) :: (load 4 from %stack.0 + 160, align 256, addrspace 5) - ; CHECK: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p5) :: (load 4 from %stack.0 + 164, align 256, addrspace 5) - ; CHECK: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p5) :: (load 4 from %stack.0 + 168, align 256, addrspace 5) - ; CHECK: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p5) :: (load 4 from %stack.0 + 172, align 256, addrspace 5) - ; CHECK: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p5) :: (load 4 from %stack.0 + 176, align 256, addrspace 5) - ; CHECK: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p5) :: (load 4 from %stack.0 + 180, align 256, addrspace 5) - ; CHECK: [[LOAD17:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD15]](p5) :: (load 4 from %stack.0 + 184, align 256, addrspace 5) - ; CHECK: [[LOAD18:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD16]](p5) :: (load 4 from %stack.0 + 188, align 256, addrspace 5) - ; CHECK: [[LOAD19:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD17]](p5) :: (load 4 from %stack.0 + 192, align 256, addrspace 5) - ; CHECK: [[LOAD20:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD18]](p5) :: (load 4 from %stack.0 + 196, align 256, addrspace 5) - ; CHECK: [[LOAD21:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD19]](p5) :: (load 4 from %stack.0 + 200, align 256, addrspace 5) - ; CHECK: [[LOAD22:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD20]](p5) :: (load 4 from %stack.0 + 204, align 256, addrspace 5) - ; CHECK: [[LOAD23:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD21]](p5) :: (load 4 from %stack.0 + 208, align 256, addrspace 5) - ; CHECK: [[LOAD24:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD22]](p5) :: (load 4 from %stack.0 + 212, align 256, addrspace 5) - ; CHECK: [[LOAD25:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD23]](p5) :: (load 4 from %stack.0 + 216, align 256, addrspace 5) - ; CHECK: [[LOAD26:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD24]](p5) :: (load 4 from %stack.0 + 220, align 256, addrspace 5) - ; CHECK: [[LOAD27:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD25]](p5) :: (load 4 from %stack.0 + 224, align 256, addrspace 5) - ; CHECK: [[LOAD28:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD26]](p5) :: (load 4 from %stack.0 + 228, align 256, addrspace 5) - ; CHECK: [[LOAD29:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD27]](p5) :: (load 4 from %stack.0 + 232, align 256, addrspace 5) - ; CHECK: [[LOAD30:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD28]](p5) :: (load 4 from %stack.0 + 236, align 256, addrspace 5) - ; CHECK: [[LOAD31:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD29]](p5) :: (load 4 from %stack.0 + 240, align 256, addrspace 5) - ; CHECK: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD30]](p5) :: (load 4 from %stack.0 + 244, align 256, addrspace 5) - ; CHECK: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD31]](p5) :: (load 4 from %stack.0 + 248, align 256, addrspace 5) - ; CHECK: [[LOAD34:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD32]](p5) :: (load 4 from %stack.0 + 252, align 256, addrspace 5) - ; CHECK: [[LOAD35:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD33]](p5) :: (load 4 from %stack.0 + 256, align 256, addrspace 5) - ; CHECK: [[LOAD36:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD34]](p5) :: (load 4 from %stack.0 + 260, align 256, addrspace 5) - ; CHECK: [[COPY64:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK: [[LOAD37:%[0-9]+]]:_(s32) = G_LOAD [[COPY64]](p5) :: (load 4 from %stack.0 + 264, align 256, addrspace 5) - ; CHECK: [[LOAD38:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD36]](p5) :: (load 4 from %stack.0 + 268, align 256, addrspace 5) - ; CHECK: [[LOAD39:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD37]](p5) :: (load 4 from %stack.0 + 272, align 256, addrspace 5) - ; CHECK: [[LOAD40:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD38]](p5) :: (load 4 from %stack.0 + 276, align 256, addrspace 5) - ; CHECK: [[LOAD41:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD39]](p5) :: (load 4 from %stack.0 + 280, align 256, addrspace 5) - ; CHECK: [[LOAD42:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD40]](p5) :: (load 4 from %stack.0 + 284, align 256, addrspace 5) - ; CHECK: [[LOAD43:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD41]](p5) :: (load 4 from %stack.0 + 288, align 256, addrspace 5) - ; CHECK: [[LOAD44:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD42]](p5) :: (load 4 from %stack.0 + 292, align 256, addrspace 5) - ; CHECK: [[LOAD45:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD43]](p5) :: (load 4 from %stack.0 + 296, align 256, addrspace 5) - ; CHECK: [[LOAD46:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD44]](p5) :: (load 4 from %stack.0 + 300, align 256, addrspace 5) - ; CHECK: [[LOAD47:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD45]](p5) :: (load 4 from %stack.0 + 304, align 256, addrspace 5) - ; CHECK: [[LOAD48:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD46]](p5) :: (load 4 from %stack.0 + 308, align 256, addrspace 5) - ; CHECK: [[LOAD49:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD47]](p5) :: (load 4 from %stack.0 + 312, align 256, addrspace 5) - ; CHECK: [[LOAD50:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD48]](p5) :: (load 4 from %stack.0 + 316, align 256, addrspace 5) - ; CHECK: [[LOAD51:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD49]](p5) :: (load 4 from %stack.0 + 320, align 256, addrspace 5) - ; CHECK: [[LOAD52:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD50]](p5) :: (load 4 from %stack.0 + 324, align 256, addrspace 5) - ; CHECK: [[LOAD53:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD51]](p5) :: (load 4 from %stack.0 + 328, align 256, addrspace 5) - ; CHECK: [[LOAD54:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD52]](p5) :: (load 4 from %stack.0 + 332, align 256, addrspace 5) - ; CHECK: [[LOAD55:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD53]](p5) :: (load 4 from %stack.0 + 336, align 256, addrspace 5) - ; CHECK: [[LOAD56:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD54]](p5) :: (load 4 from %stack.0 + 340, align 256, addrspace 5) - ; CHECK: [[LOAD57:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD55]](p5) :: (load 4 from %stack.0 + 344, align 256, addrspace 5) - ; CHECK: [[LOAD58:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD56]](p5) :: (load 4 from %stack.0 + 348, align 256, addrspace 5) - ; CHECK: [[LOAD59:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD57]](p5) :: (load 4 from %stack.0 + 352, align 256, addrspace 5) - ; CHECK: [[LOAD60:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD58]](p5) :: (load 4 from %stack.0 + 356, align 256, addrspace 5) - ; CHECK: [[LOAD61:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD59]](p5) :: (load 4 from %stack.0 + 360, align 256, addrspace 5) - ; CHECK: [[LOAD62:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD60]](p5) :: (load 4 from %stack.0 + 364, align 256, addrspace 5) - ; CHECK: [[LOAD63:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD61]](p5) :: (load 4 from %stack.0 + 368, align 256, addrspace 5) - ; CHECK: [[LOAD64:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD62]](p5) :: (load 4 from %stack.0 + 372, align 256, addrspace 5) - ; CHECK: [[LOAD65:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD63]](p5) :: (load 4 from %stack.0 + 376, align 256, addrspace 5) - ; CHECK: [[LOAD66:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD64]](p5) :: (load 4 from %stack.0 + 380, align 256, addrspace 5) - ; CHECK: [[LOAD67:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD65]](p5) :: (load 4 from %stack.0 + 384, align 256, addrspace 5) - ; CHECK: [[COPY65:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD16]](s32), [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32) - ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32) - ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD32]](s32), [[LOAD33]](s32), [[LOAD34]](s32), [[LOAD35]](s32) - ; CHECK: [[BUILD_VECTOR8:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD36]](s32), [[LOAD37]](s32), [[LOAD38]](s32), [[LOAD39]](s32) - ; CHECK: [[BUILD_VECTOR9:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD40]](s32), [[LOAD41]](s32), [[LOAD42]](s32), [[LOAD43]](s32) - ; CHECK: [[BUILD_VECTOR10:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD44]](s32), [[LOAD45]](s32), [[LOAD46]](s32), [[LOAD47]](s32) - ; CHECK: [[BUILD_VECTOR11:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD48]](s32), [[LOAD49]](s32), [[LOAD50]](s32), [[LOAD51]](s32) - ; CHECK: [[BUILD_VECTOR12:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD52]](s32), [[LOAD53]](s32), [[LOAD54]](s32), [[LOAD55]](s32) - ; CHECK: [[BUILD_VECTOR13:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD56]](s32), [[LOAD57]](s32), [[LOAD58]](s32), [[LOAD59]](s32) - ; CHECK: [[BUILD_VECTOR14:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD60]](s32), [[LOAD61]](s32), [[LOAD62]](s32), [[LOAD63]](s32) - ; CHECK: [[BUILD_VECTOR15:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD64]](s32), [[LOAD65]](s32), [[LOAD66]](s32), [[LOAD67]](s32) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY65]](p1) :: (store 16, align 4, addrspace 1) - ; CHECK: [[C67:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD66:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C67]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD66]](p1) :: (store 16 + 16, align 4, addrspace 1) - ; CHECK: [[C68:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD67:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C68]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD67]](p1) :: (store 16 + 32, align 4, addrspace 1) - ; CHECK: [[C69:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD68:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C69]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD68]](p1) :: (store 16 + 48, align 4, addrspace 1) - ; CHECK: [[PTR_ADD69:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD69]](p1) :: (store 16 + 64, align 4, addrspace 1) - ; CHECK: [[C70:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK: [[PTR_ADD70:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C70]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD70]](p1) :: (store 16 + 80, align 4, addrspace 1) - ; CHECK: [[C71:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK: [[PTR_ADD71:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C71]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD71]](p1) :: (store 16 + 96, align 4, addrspace 1) - ; CHECK: [[C72:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK: [[PTR_ADD72:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C72]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD72]](p1) :: (store 16 + 112, align 4, addrspace 1) - ; CHECK: [[PTR_ADD73:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C1]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR8]](<4 x s32>), [[PTR_ADD73]](p1) :: (store 16 + 128, align 4, addrspace 1) - ; CHECK: [[C73:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD74:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C73]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR9]](<4 x s32>), [[PTR_ADD74]](p1) :: (store 16 + 144, align 4, addrspace 1) - ; CHECK: [[C74:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 - ; CHECK: [[PTR_ADD75:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C74]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR10]](<4 x s32>), [[PTR_ADD75]](p1) :: (store 16 + 160, align 4, addrspace 1) - ; CHECK: [[C75:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 - ; CHECK: [[PTR_ADD76:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C75]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR11]](<4 x s32>), [[PTR_ADD76]](p1) :: (store 16 + 176, align 4, addrspace 1) - ; CHECK: [[PTR_ADD77:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR12]](<4 x s32>), [[PTR_ADD77]](p1) :: (store 16 + 192, align 4, addrspace 1) - ; CHECK: [[C76:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 - ; CHECK: [[PTR_ADD78:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C76]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR13]](<4 x s32>), [[PTR_ADD78]](p1) :: (store 16 + 208, align 4, addrspace 1) - ; CHECK: [[C77:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 - ; CHECK: [[PTR_ADD79:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C77]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR14]](<4 x s32>), [[PTR_ADD79]](p1) :: (store 16 + 224, align 4, addrspace 1) - ; CHECK: [[C78:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 - ; CHECK: [[PTR_ADD80:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY65]], [[C78]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR15]](<4 x s32>), [[PTR_ADD80]](p1) :: (store 16 + 240, align 4, addrspace 1) + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32 + ; CHECK: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>) + ; CHECK: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store 16 + 16, align 4, addrspace 1) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store 16 + 32, align 4, addrspace 1) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store 16 + 48, align 4, addrspace 1) + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store 16 + 64, align 4, addrspace 1) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store 16 + 80, align 4, addrspace 1) + ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 + ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) + ; CHECK: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store 16 + 96, align 4, addrspace 1) + ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 + ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) + ; CHECK: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store 16 + 112, align 4, addrspace 1) + ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store 16 + 128, align 4, addrspace 1) + ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) + ; CHECK: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store 16 + 144, align 4, addrspace 1) + ; CHECK: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 + ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) + ; CHECK: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store 16 + 160, align 4, addrspace 1) + ; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 + ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) + ; CHECK: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store 16 + 176, align 4, addrspace 1) + ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store 16 + 192, align 4, addrspace 1) + ; CHECK: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 + ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) + ; CHECK: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store 16 + 208, align 4, addrspace 1) + ; CHECK: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 + ; CHECK: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) + ; CHECK: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store 16 + 224, align 4, addrspace 1) + ; CHECK: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 + ; CHECK: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) + ; CHECK: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store 16 + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load 256, align 4, addrspace 4) diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 3a9fb59cae6f03..bd54b6d7e35919 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -3051,4 +3051,85 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) { EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } +// Test fewer elements of G_INSERT_VECTOR_ELEMENT +TEST_F(AArch64GISelMITest, FewerElementsInsertVectorElt) { + setUp(); + if (!TM) + return; + + DefineLegalizerInfo(A, {}); + + LLT P0{LLT::pointer(0, 64)}; + LLT S64{LLT::scalar(64)}; + LLT S16{LLT::scalar(16)}; + LLT V2S16{LLT::vector(2, 16)}; + LLT V3S16{LLT::vector(3, 16)}; + LLT V8S16{LLT::vector(8, 16)}; + + auto Ptr0 = B.buildIntToPtr(P0, Copies[0]); + auto VectorV8 = B.buildLoad(V8S16, Ptr0, MachinePointerInfo(), Align(8)); + auto Value = B.buildTrunc(S16, Copies[1]); + + auto Seven = B.buildConstant(S64, 7); + auto InsertV8Constant7_0 = + B.buildInsertVectorElement(V8S16, VectorV8, Value, Seven); + auto InsertV8Constant7_1 = + B.buildInsertVectorElement(V8S16, VectorV8, Value, Seven); + + B.buildStore(InsertV8Constant7_0, Ptr0, MachinePointerInfo(), Align(8), + MachineMemOperand::MOVolatile); + B.buildStore(InsertV8Constant7_1, Ptr0, MachinePointerInfo(), Align(8), + MachineMemOperand::MOVolatile); + + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + + // Perform Legalization + B.setInsertPt(*EntryMBB, InsertV8Constant7_0->getIterator()); + + // This should index the high element of the 4th piece of an unmerge. + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.fewerElementsVector(*InsertV8Constant7_0, 0, V2S16)); + + // This case requires extracting an intermediate vector type into the target + // v4s16. + B.setInsertPt(*EntryMBB, InsertV8Constant7_1->getIterator()); + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.fewerElementsVector(*InsertV8Constant7_1, 0, V3S16)); + + const auto *CheckStr = R"( + CHECK: [[COPY0:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY + CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY + CHECK: [[PTR0:%[0-9]+]]:_(p0) = G_INTTOPTR [[COPY0]] + CHECK: [[VEC8:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR0]]:_(p0) :: (load 16, align 8) + CHECK: [[INSERT_VAL:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]] + + + CHECK: [[UNMERGE0:%[0-9]+]]:_(<2 x s16>), [[UNMERGE1:%[0-9]+]]:_(<2 x s16>), [[UNMERGE2:%[0-9]+]]:_(<2 x s16>), [[UNMERGE3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[VEC8]] + CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + CHECK: [[SUB_INSERT_7:%[0-9]+]]:_(<2 x s16>) = G_INSERT_VECTOR_ELT [[UNMERGE3]]:_, [[INSERT_VAL]]:_(s16), [[ONE]] + CHECK: [[INSERT_V8_7_0:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[UNMERGE0]]:_(<2 x s16>), [[UNMERGE1]]:_(<2 x s16>), [[UNMERGE2]]:_(<2 x s16>), [[SUB_INSERT_7]]:_(<2 x s16>) + + + CHECK: [[UNMERGE1_0:%[0-9]+]]:_(s16), [[UNMERGE1_1:%[0-9]+]]:_(s16), [[UNMERGE1_2:%[0-9]+]]:_(s16), [[UNMERGE1_3:%[0-9]+]]:_(s16), [[UNMERGE1_4:%[0-9]+]]:_(s16), [[UNMERGE1_5:%[0-9]+]]:_(s16), [[UNMERGE1_6:%[0-9]+]]:_(s16), [[UNMERGE1_7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[VEC8]]:_(<8 x s16>) + CHECK: [[IMPDEF_S16:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + CHECK: [[BUILD0:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_0]]:_(s16), [[UNMERGE1_1]]:_(s16), [[UNMERGE1_2]]:_(s16) + CHECK: [[BUILD1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_3]]:_(s16), [[UNMERGE1_4]]:_(s16), [[UNMERGE1_5]]:_(s16) + CHECK: [[BUILD2:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UNMERGE1_6]]:_(s16), [[UNMERGE1_7]]:_(s16), [[IMPDEF_S16]]:_(s16) + CHECK: [[IMPDEF_V3S16:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + CHECK: [[ONE_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + CHECK: [[SUB_INSERT_7_V3S16:%[0-9]+]]:_(<3 x s16>) = G_INSERT_VECTOR_ELT [[BUILD2]]:_, [[INSERT_VAL]]:_(s16), [[ONE_1]] + CHECK: [[WIDE_CONCAT:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BUILD0]]:_(<3 x s16>), [[BUILD1]]:_(<3 x s16>), [[SUB_INSERT_7_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>), [[IMPDEF_V3S16]]:_(<3 x s16>) + CHECK: [[INSERT_V8_7_1:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[WIDE_CONCAT]]:_(<24 x s16>), 0 + + CHECK: G_STORE [[INSERT_V8_7_0]] + CHECK: G_STORE [[INSERT_V8_7_1]] + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + } // namespace From 9887a70e7a768f6fca135587ce3e62d691a3646d Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Tue, 28 Jul 2020 01:35:18 -0400 Subject: [PATCH 072/101] [libc] Add ULP function to MPFRNumber class to test correctly rounded functions such as SQRT, FMA. Add ULP function to MPFRNumber class to test correctly rounded functions. Differential Revision: https://reviews.llvm.org/D84725 --- libc/utils/MPFRWrapper/MPFRUtils.cpp | 100 +++++++++++++++++++++++---- libc/utils/MPFRWrapper/MPFRUtils.h | 22 ++++-- 2 files changed, 106 insertions(+), 16 deletions(-) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index c6020f471e88d8..c97e89ce9b2b46 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -9,6 +9,7 @@ #include "MPFRUtils.h" #include "utils/FPUtil/FPBits.h" +#include "utils/FPUtil/TestHelpers.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" @@ -119,6 +120,9 @@ class MPFRNumber { case Operation::Sin: mpfr_sin(value, mpfrInput.value, MPFR_RNDN); break; + case Operation::Sqrt: + mpfr_sqrt(value, mpfrInput.value, MPFR_RNDN); + break; case Operation::Trunc: mpfr_trunc(value, mpfrInput.value); break; @@ -155,9 +159,59 @@ class MPFRNumber { } // These functions are useful for debugging. - float asFloat() const { return mpfr_get_flt(value, MPFR_RNDN); } - double asDouble() const { return mpfr_get_d(value, MPFR_RNDN); } + template T as() const; + + template <> float as() const { return mpfr_get_flt(value, MPFR_RNDN); } + template <> double as() const { return mpfr_get_d(value, MPFR_RNDN); } + template <> long double as() const { + return mpfr_get_ld(value, MPFR_RNDN); + } + void dump(const char *msg) const { mpfr_printf("%s%.128Rf\n", msg, value); } + + // Return the ULP (units-in-the-last-place) difference between the + // stored MPFR and a floating point number. + // + // We define: + // ULP(mpfr_value, value) = abs(mpfr_value - value) / eps(value) + // + // Remarks: + // 1. ULP < 0.5 will imply that the value is correctly rounded. + // 2. We expect that this value and the value to be compared (the [input] + // argument) are reasonable close, and we will provide an upper bound + // of ULP value for testing. Morever, most of the fractional parts of + // ULP value do not matter much, so using double as the return type + // should be good enough. + template + cpp::EnableIfType::Value, double> ulp(T input) { + fputil::FPBits bits(input); + MPFRNumber mpfrInput(input); + + // abs(value - input) + mpfr_sub(mpfrInput.value, value, mpfrInput.value, MPFR_RNDN); + mpfr_abs(mpfrInput.value, mpfrInput.value, MPFR_RNDN); + + // get eps(input) + int epsExponent = bits.exponent - fputil::FPBits::exponentBias - + fputil::MantissaWidth::value; + if (bits.exponent == 0) { + // correcting denormal exponent + ++epsExponent; + } else if ((bits.mantissa == 0) && (bits.exponent > 1) && + mpfr_less_p(value, mpfrInput.value)) { + // when the input is exactly 2^n, distance (epsilon) between the input + // and the next floating point number is different from the distance to + // the previous floating point number. So in that case, if the correct + // value from MPFR is smaller than the input, we use the smaller epsilon + --epsExponent; + } + + // Since eps(value) is of the form 2^e, instead of dividing such number, + // we multiply by its inverse 2^{-e}. + mpfr_mul_2si(mpfrInput.value, mpfrInput.value, -epsExponent, MPFR_RNDN); + + return mpfrInput.as(); + } }; namespace internal { @@ -167,19 +221,26 @@ void MPFRMatcher::explainError(testutils::StreamWrapper &OS) { MPFRNumber mpfrResult(operation, input); MPFRNumber mpfrInput(input); MPFRNumber mpfrMatchValue(matchValue); - MPFRNumber mpfrToleranceValue(matchValue, tolerance); FPBits inputBits(input); FPBits matchBits(matchValue); - // TODO: Call to llvm::utohexstr implicitly converts __uint128_t values to - // uint64_t values. This can be fixed using a custom wrapper for - // llvm::utohexstr to handle __uint128_t values correctly. + FPBits mpfrResultBits(mpfrResult.as()); OS << "Match value not within tolerance value of MPFR result:\n" - << " Input decimal: " << mpfrInput.str() << '\n' - << " Input bits: 0x" << llvm::utohexstr(inputBits.bitsAsUInt()) << '\n' - << " Match decimal: " << mpfrMatchValue.str() << '\n' - << " Match bits: 0x" << llvm::utohexstr(matchBits.bitsAsUInt()) << '\n' - << " MPFR result: " << mpfrResult.str() << '\n' - << "Tolerance value: " << mpfrToleranceValue.str() << '\n'; + << " Input decimal: " << mpfrInput.str() << '\n'; + __llvm_libc::fputil::testing::describeValue(" Input bits: ", input, OS); + OS << '\n' << " Match decimal: " << mpfrMatchValue.str() << '\n'; + __llvm_libc::fputil::testing::describeValue(" Match bits: ", matchValue, + OS); + OS << '\n' << " MPFR result: " << mpfrResult.str() << '\n'; + __llvm_libc::fputil::testing::describeValue( + " MPFR rounded: ", mpfrResult.as(), OS); + OS << '\n'; + if (useULP) { + OS << " ULP error: " << std::to_string(mpfrResult.ulp(matchValue)) + << '\n'; + } else { + MPFRNumber mpfrToleranceValue = MPFRNumber(matchValue, tolerance); + OS << "Tolerance value: " << mpfrToleranceValue.str() << '\n'; + } } template void MPFRMatcher::explainError(testutils::StreamWrapper &); @@ -201,6 +262,21 @@ template bool compare(Operation, double, double, const Tolerance &); template bool compare(Operation, long double, long double, const Tolerance &); +template +bool compare(Operation op, T input, T libcResult, double ulpError) { + // If the ulp error is exactly 0.5 (i.e a tie), we would check that the result + // is rounded to the nearest even. + MPFRNumber mpfrResult(op, input); + double ulp = mpfrResult.ulp(libcResult); + bool bitsAreEven = ((FPBits(libcResult).bitsAsUInt() & 1) == 0); + return (ulp < ulpError) || + ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); +} + +template bool compare(Operation, float, float, double); +template bool compare(Operation, double, double, double); +template bool compare(Operation, long double, long double, double); + } // namespace internal } // namespace mpfr diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 56281656533250..633c67ff8570fb 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -48,6 +48,7 @@ enum class Operation : int { Floor, Round, Sin, + Sqrt, Trunc }; @@ -56,6 +57,9 @@ namespace internal { template bool compare(Operation op, T input, T libcOutput, const Tolerance &t); +template +bool compare(Operation op, T input, T libcOutput, double t); + template class MPFRMatcher : public testing::Matcher { static_assert(__llvm_libc::cpp::IsFloatingPointType::Value, "MPFRMatcher can only be used with floating point values."); @@ -64,14 +68,21 @@ template class MPFRMatcher : public testing::Matcher { T input; Tolerance tolerance; T matchValue; + double ulpTolerance; + bool useULP; public: MPFRMatcher(Operation op, T testInput, Tolerance &t) - : operation(op), input(testInput), tolerance(t) {} + : operation(op), input(testInput), tolerance(t), useULP(false) {} + MPFRMatcher(Operation op, T testInput, double ulpTolerance) + : operation(op), input(testInput), ulpTolerance(ulpTolerance), + useULP(true) {} bool match(T libcResult) { matchValue = libcResult; - return internal::compare(operation, input, libcResult, tolerance); + return (useULP + ? internal::compare(operation, input, libcResult, ulpTolerance) + : internal::compare(operation, input, libcResult, tolerance)); } void explainError(testutils::StreamWrapper &OS) override; @@ -79,9 +90,12 @@ template class MPFRMatcher : public testing::Matcher { } // namespace internal -template +template __attribute__((no_sanitize("address"))) -internal::MPFRMatcher getMPFRMatcher(Operation op, T input, Tolerance t) { +typename cpp::EnableIfType || + cpp::IsSameV, + internal::MPFRMatcher> +getMPFRMatcher(Operation op, T input, U t) { static_assert( __llvm_libc::cpp::IsFloatingPointType::Value, "getMPFRMatcher can only be used to match floating point results."); From f768eb216f5924219c845515c606ab0703825634 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Tue, 18 Aug 2020 11:04:58 -0700 Subject: [PATCH 073/101] [libc][obvious] Fix link order of math tests. --- libc/test/src/math/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index f5a1e0e51a715f..e73de54035642e 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -18,10 +18,10 @@ function(add_fp_unittest name) add_libc_unittest(${name} ${MATH_UNITTEST_UNPARSED_ARGUMENTS}) get_fq_target_name(${name} fq_target_name) - target_link_libraries(${fq_target_name} PRIVATE LibcFPTestHelpers) if(MATH_UNITTEST_NEED_MPFR) target_link_libraries(${fq_target_name} PRIVATE libcMPFRWrapper -lmpfr -lgmp) endif() + target_link_libraries(${fq_target_name} PRIVATE LibcFPTestHelpers) endfunction(add_fp_unittest) add_fp_unittest( From f29e6277ad6bcff36ed950dbf8effddc59ba9c28 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 18 Aug 2020 09:23:48 -0700 Subject: [PATCH 074/101] [GlobalISel][CallLowering] Don't tail call with non-forwarded explicit sret Similar to this commit: faf8065a99817bcb10e6f09b558fe3e0972c35ce Testcase is pretty much the same as test/CodeGen/AArch64/tailcall-explicit-sret.ll Except it uses i64 (since we don't handle the i1024 return values yet), and doesn't have indirect tail call testcases (because we can't translate those yet). Differential Revision: https://reviews.llvm.org/D86148 --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 19 ++-- .../call-translator-tail-call-sret.ll | 100 ++++++++++++++++++ 2 files changed, 113 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index e443f603def6b3..cf1059c67b4a05 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -65,6 +65,12 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, std::function GetCalleeReg) const { CallLoweringInfo Info; const DataLayout &DL = MIRBuilder.getDataLayout(); + MachineFunction &MF = MIRBuilder.getMF(); + bool CanBeTailCalled = CB.isTailCall() && + isInTailCallPosition(CB, MF.getTarget()) && + (MF.getFunction() + .getFnAttribute("disable-tail-calls") + .getValueAsString() != "true"); // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that @@ -75,6 +81,12 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArgInfo OrigArg{ArgRegs[i], Arg->getType(), getAttributesForArgIdx(CB, i), i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CB); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (OrigArg.Flags[0].isSRet() && isa(&Arg)) + CanBeTailCalled = false; + Info.OrigArgs.push_back(OrigArg); ++i; } @@ -91,16 +103,11 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, if (!Info.OrigRet.Ty->isVoidTy()) setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB); - MachineFunction &MF = MIRBuilder.getMF(); Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees); Info.CallConv = CB.getCallingConv(); Info.SwiftErrorVReg = SwiftErrorVReg; Info.IsMustTailCall = CB.isMustTailCall(); - Info.IsTailCall = - CB.isTailCall() && isInTailCallPosition(CB, MF.getTarget()) && - (MF.getFunction() - .getFnAttribute("disable-tail-calls") - .getValueAsString() != "true"); + Info.IsTailCall = CanBeTailCalled; Info.IsVarArg = CB.getFunctionType()->isVarArg(); return lowerCall(MIRBuilder, Info); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll new file mode 100644 index 00000000000000..a9a93d1b7f7a93 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call-sret.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc < %s -mtriple arm64-apple-darwin -global-isel -stop-after=irtranslator -verify-machineinstrs | FileCheck %s + +; Check that we don't try to tail-call with a non-forwarded sret parameter. +declare void @test_explicit_sret(i64* sret) + +; Forwarded explicit sret pointer => we can tail call. +define void @can_tail_call_forwarded_explicit_sret_ptr(i64* sret %arg) { + ; CHECK-LABEL: name: can_tail_call_forwarded_explicit_sret_ptr + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x8 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 + ; CHECK: $x8 = COPY [[COPY]](p0) + ; CHECK: TCRETURNdi @test_explicit_sret, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $x8 + tail call void @test_explicit_sret(i64* %arg) + ret void +} + +; Not marked as tail, so don't tail call. +define void @test_call_explicit_sret(i64* sret %arg) { + ; CHECK-LABEL: name: test_call_explicit_sret + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x8 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[COPY]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + call void @test_explicit_sret(i64* %arg) + ret void +} + +define void @dont_tail_call_explicit_sret_alloca_unused() { + ; CHECK-LABEL: name: dont_tail_call_explicit_sret_alloca_unused + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %l = alloca i64, align 8 + tail call void @test_explicit_sret(i64* %l) + ret void +} + +define void @dont_tail_call_explicit_sret_alloca_dummyusers(i64* %ptr) { + ; CHECK-LABEL: name: dont_tail_call_explicit_sret_alloca_dummyusers + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load 8 from %ir.ptr) + ; CHECK: G_STORE [[LOAD]](s64), [[FRAME_INDEX]](p0) :: (store 8 into %ir.l) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %l = alloca i64, align 8 + %r = load i64, i64* %ptr, align 8 + store i64 %r, i64* %l, align 8 + tail call void @test_explicit_sret(i64* %l) + ret void +} + +define void @dont_tail_call_tailcall_explicit_sret_gep(i64* %ptr) { + ; CHECK-LABEL: name: dont_tail_call_tailcall_explicit_sret_gep + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[PTR_ADD]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: RET_ReallyLR + %ptr2 = getelementptr i64, i64* %ptr, i32 1 + tail call void @test_explicit_sret(i64* %ptr2) + ret void +} + +define i64 @dont_tail_call_sret_alloca_returned() { + ; CHECK-LABEL: name: dont_tail_call_sret_alloca_returned + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.l + ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK: $x8 = COPY [[FRAME_INDEX]](p0) + ; CHECK: BL @test_explicit_sret, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x8 + ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load 8 from %ir.l) + ; CHECK: $x0 = COPY [[LOAD]](s64) + ; CHECK: RET_ReallyLR implicit $x0 + %l = alloca i64, align 8 + tail call void @test_explicit_sret(i64* %l) + %r = load i64, i64* %l, align 8 + ret i64 %r +} From bf36e902953a4bf8ac0aae5a498445951fbc3882 Mon Sep 17 00:00:00 2001 From: Jessica Paquette Date: Tue, 18 Aug 2020 10:37:10 -0700 Subject: [PATCH 075/101] [GlobalISel][CallLowering] NFC: Unify flag-setting from CallBase + AttributeList It's annoying to have to maintain multiple, nearly identical chains of if statements which all set the same attributes. Add a helper function, `addFlagsUsingAttrFn` which performs the attribute setting. Then, use wrappers for that function in `lowerCall` and `setArgFlags`. (Note that the flag-setting code in `setArgFlags` was missing the returned attribute. There's no selection for this yet, so no test. It's an example of the kind of thing this lets us avoid, though.) Differential Revision: https://reviews.llvm.org/D86159 --- .../llvm/CodeGen/GlobalISel/CallLowering.h | 6 ++ llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 66 +++++++++---------- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 1eec08f5106220..ef93042f6690d8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -213,6 +213,12 @@ class CallLowering { ISD::ArgFlagsTy getAttributesForArgIdx(const CallBase &Call, unsigned ArgIdx) const; + /// Adds flags to \p Flags based off of the attributes in \p Attrs. + /// \p OpIdx is the index in \p Attrs to add flags from. + void addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, + const AttributeList &Attrs, + unsigned OpIdx) const; + template void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const; diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index cf1059c67b4a05..49d101a81e933e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -30,34 +30,51 @@ using namespace llvm; void CallLowering::anchor() {} -ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, - unsigned ArgIdx) const { - ISD::ArgFlagsTy Flags; - if (Call.paramHasAttr(ArgIdx, Attribute::SExt)) +/// Helper function which updates \p Flags when \p AttrFn returns true. +static void +addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags, + const std::function &AttrFn) { + if (AttrFn(Attribute::SExt)) Flags.setSExt(); - if (Call.paramHasAttr(ArgIdx, Attribute::ZExt)) + if (AttrFn(Attribute::ZExt)) Flags.setZExt(); - if (Call.paramHasAttr(ArgIdx, Attribute::InReg)) + if (AttrFn(Attribute::InReg)) Flags.setInReg(); - if (Call.paramHasAttr(ArgIdx, Attribute::StructRet)) + if (AttrFn(Attribute::StructRet)) Flags.setSRet(); - if (Call.paramHasAttr(ArgIdx, Attribute::Nest)) + if (AttrFn(Attribute::Nest)) Flags.setNest(); - if (Call.paramHasAttr(ArgIdx, Attribute::ByVal)) + if (AttrFn(Attribute::ByVal)) Flags.setByVal(); - if (Call.paramHasAttr(ArgIdx, Attribute::Preallocated)) + if (AttrFn(Attribute::Preallocated)) Flags.setPreallocated(); - if (Call.paramHasAttr(ArgIdx, Attribute::InAlloca)) + if (AttrFn(Attribute::InAlloca)) Flags.setInAlloca(); - if (Call.paramHasAttr(ArgIdx, Attribute::Returned)) + if (AttrFn(Attribute::Returned)) Flags.setReturned(); - if (Call.paramHasAttr(ArgIdx, Attribute::SwiftSelf)) + if (AttrFn(Attribute::SwiftSelf)) Flags.setSwiftSelf(); - if (Call.paramHasAttr(ArgIdx, Attribute::SwiftError)) + if (AttrFn(Attribute::SwiftError)) Flags.setSwiftError(); +} + +ISD::ArgFlagsTy CallLowering::getAttributesForArgIdx(const CallBase &Call, + unsigned ArgIdx) const { + ISD::ArgFlagsTy Flags; + addFlagsUsingAttrFn(Flags, [&Call, &ArgIdx](Attribute::AttrKind Attr) { + return Call.paramHasAttr(ArgIdx, Attr); + }); return Flags; } +void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags, + const AttributeList &Attrs, + unsigned OpIdx) const { + addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) { + return Attrs.hasAttribute(OpIdx, Attr); + }); +} + bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, ArrayRef ResRegs, ArrayRef> ArgRegs, @@ -118,24 +135,7 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const FuncInfoTy &FuncInfo) const { auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); - if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) - Flags.setZExt(); - if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) - Flags.setSExt(); - if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) - Flags.setInReg(); - if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) - Flags.setSRet(); - if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) - Flags.setSwiftSelf(); - if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) - Flags.setSwiftError(); - if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) - Flags.setByVal(); - if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated)) - Flags.setPreallocated(); - if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) - Flags.setInAlloca(); + addArgFlagsFromAttributes(Flags, Attrs, OpIdx); if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = cast(Arg.Ty)->getElementType(); @@ -152,8 +152,6 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, FrameAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL)); Flags.setByValAlign(FrameAlign); } - if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) - Flags.setNest(); Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); } From 62dbbcf6d7c67b02fd540a5a1e55c494bf88adea Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 18 Aug 2020 19:03:40 +0000 Subject: [PATCH 076/101] Remove MLIREDSCInterface library which isn't used anywhere (NFC) Reviewed By: nicolasvasilache, ftynse Differential Revision: https://reviews.llvm.org/D85042 --- mlir/lib/EDSC/CMakeLists.txt | 11 ----- mlir/lib/EDSC/CoreAPIs.cpp | 93 ------------------------------------ 2 files changed, 104 deletions(-) delete mode 100644 mlir/lib/EDSC/CoreAPIs.cpp diff --git a/mlir/lib/EDSC/CMakeLists.txt b/mlir/lib/EDSC/CMakeLists.txt index 6d56f263f2713a..33e1d8e80e2fc5 100644 --- a/mlir/lib/EDSC/CMakeLists.txt +++ b/mlir/lib/EDSC/CMakeLists.txt @@ -14,14 +14,3 @@ add_mlir_library(MLIREDSC MLIRSupport ) -add_mlir_library(MLIREDSCInterface - CoreAPIs.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/EDSC - - LINK_LIBS PUBLIC - MLIRIR - MLIRSupport - MLIRParser - ) diff --git a/mlir/lib/EDSC/CoreAPIs.cpp b/mlir/lib/EDSC/CoreAPIs.cpp deleted file mode 100644 index 55b7c2c77a0efe..00000000000000 --- a/mlir/lib/EDSC/CoreAPIs.cpp +++ /dev/null @@ -1,93 +0,0 @@ -//===- Types.cpp - Implementations of MLIR Core C APIs --------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir-c/Core.h" - -#include "mlir/IR/AffineMap.h" -#include "mlir/IR/Attributes.h" -#include "mlir/IR/Function.h" -#include "mlir/IR/MLIRContext.h" -#include "mlir/IR/StandardTypes.h" -#include "mlir/IR/Types.h" -#include "mlir/Support/LLVM.h" - -#include "mlir/Parser.h" - -#include "llvm/ADT/StringSwitch.h" - -using namespace mlir; - -mlir_type_t makeMemRefType(mlir_context_t context, mlir_type_t elemType, - int64_list_t sizes) { - auto t = mlir::MemRefType::get( - ArrayRef(sizes.values, sizes.n), - mlir::Type::getFromOpaquePointer(elemType), - {mlir::AffineMap::getMultiDimIdentityMap( - sizes.n, reinterpret_cast(context))}, - 0); - return mlir_type_t{t.getAsOpaquePointer()}; -} - -mlir_type_t makeFunctionType(mlir_context_t context, mlir_type_list_t inputs, - mlir_type_list_t outputs) { - SmallVector ins(inputs.n), outs(outputs.n); - for (unsigned i = 0; i < inputs.n; ++i) { - ins[i] = mlir::Type::getFromOpaquePointer(inputs.types[i]); - } - for (unsigned i = 0; i < outputs.n; ++i) { - outs[i] = mlir::Type::getFromOpaquePointer(outputs.types[i]); - } - auto ft = mlir::FunctionType::get( - ins, outs, reinterpret_cast(context)); - return mlir_type_t{ft.getAsOpaquePointer()}; -} - -mlir_type_t makeIndexType(mlir_context_t context) { - auto *ctx = reinterpret_cast(context); - auto type = mlir::IndexType::get(ctx); - return mlir_type_t{type.getAsOpaquePointer()}; -} - -mlir_attr_t makeIntegerAttr(mlir_type_t type, int64_t value) { - auto ty = Type::getFromOpaquePointer(reinterpret_cast(type)); - auto attr = IntegerAttr::get(ty, value); - return mlir_attr_t{attr.getAsOpaquePointer()}; -} - -mlir_attr_t makeBoolAttr(mlir_context_t context, bool value) { - auto *ctx = reinterpret_cast(context); - auto attr = BoolAttr::get(value, ctx); - return mlir_attr_t{attr.getAsOpaquePointer()}; -} - -mlir_attr_t makeFloatAttr(mlir_context_t context, float value) { - auto *ctx = reinterpret_cast(context); - auto attr = FloatAttr::get(FloatType::getF32(ctx), APFloat(value)); - return mlir_attr_t{attr.getAsOpaquePointer()}; -} - -mlir_attr_t makeStringAttr(mlir_context_t context, const char *value) { - auto *ctx = reinterpret_cast(context); - auto attr = StringAttr::get(value, ctx); - return mlir_attr_t{attr.getAsOpaquePointer()}; -} - -unsigned getFunctionArity(mlir_func_t function) { - auto f = mlir::FuncOp::getFromOpaquePointer(function); - return f.getNumArguments(); -} - -mlir_type_t mlirParseType(const char *type, mlir_context_t context, - uint64_t *charsRead) { - auto *ctx = reinterpret_cast(context); - size_t numRead = 0; - Type ty = parseType(type, ctx, numRead); - if (charsRead) - *charsRead = numRead; - return mlir_type_t{ty.getAsOpaquePointer()}; -} From 6b1f9f2bd4437910804d571284b7c5bb66eac250 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 18 Aug 2020 12:29:58 -0700 Subject: [PATCH 077/101] [X86] Don't call SemaBuiltinConstantArg from CheckX86BuiltinTileDuplicate if Argument is Type or Value Dependent. SemaBuiltinConstantArg has an early exit for that case that doesn't produce an error and doesn't update the APInt. We need to detect that case and not use the APInt value. While there delete the signature of CheckX86BuiltinTileArgumentsRange that takes a single Argument index to check. There's another version that takes an ArrayRef and single value is convertible to an ArrayRef. --- clang/include/clang/Sema/Sema.h | 1 - clang/lib/Sema/SemaChecking.cpp | 17 +++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 1d12551a8ad212..19d58b889ef75e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12192,7 +12192,6 @@ class Sema final { bool CheckX86BuiltinTileArguments(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, ArrayRef ArgNums); - bool CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, int ArgNum); bool CheckX86BuiltinTileDuplicate(CallExpr *TheCall, ArrayRef ArgNums); bool CheckX86BuiltinTileRangeAndDuplicate(CallExpr *TheCall, ArrayRef ArgNums); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 70d3a682fc7028..deceffdb0ba50c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3705,7 +3705,7 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, enum { TileRegLow = 0, TileRegHigh = 7 }; bool Sema::CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, - ArrayRef ArgNums) { + ArrayRef ArgNums) { for (int ArgNum : ArgNums) { if (SemaBuiltinConstantArgRange(TheCall, ArgNum, TileRegLow, TileRegHigh)) return true; @@ -3713,19 +3713,20 @@ bool Sema::CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, return false; } -bool Sema::CheckX86BuiltinTileArgumentsRange(CallExpr *TheCall, int ArgNum) { - return SemaBuiltinConstantArgRange(TheCall, ArgNum, TileRegLow, TileRegHigh); -} - bool Sema::CheckX86BuiltinTileDuplicate(CallExpr *TheCall, ArrayRef ArgNums) { // Because the max number of tile register is TileRegHigh + 1, so here we use // each bit to represent the usage of them in bitset. std::bitset ArgValues; for (int ArgNum : ArgNums) { - llvm::APSInt Arg; - SemaBuiltinConstantArg(TheCall, ArgNum, Arg); - int ArgExtValue = Arg.getExtValue(); + Expr *Arg = TheCall->getArg(ArgNum); + if (Arg->isTypeDependent() || Arg->isValueDependent()) + continue; + + llvm::APSInt Result; + if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) + return true; + int ArgExtValue = Result.getExtValue(); assert((ArgExtValue >= TileRegLow || ArgExtValue <= TileRegHigh) && "Incorrect tile register num."); if (ArgValues.test(ArgExtValue)) From 673dbe1b5eef09db39783c828a84f1213a47bad0 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 30 Jul 2020 17:32:39 -0700 Subject: [PATCH 078/101] [clang codegen] Use IR "align" attribute for static array arguments. Without the "align" attribute, marking the argument dereferenceable is basically useless. See also D80166. Fixes https://bugs.llvm.org/show_bug.cgi?id=46876 . Differential Revision: https://reviews.llvm.org/D84992 --- clang/lib/CodeGen/CGCall.cpp | 16 ++++++++++++---- clang/test/CodeGen/vla.c | 10 +++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 9d225b23e3c335..98ba1efc20de6f 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2520,6 +2520,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // bytes). if (ArrTy->getSizeModifier() == ArrayType::Static) { QualType ETy = ArrTy->getElementType(); + llvm::Align Alignment = + CGM.getNaturalTypeAlignment(ETy).getAsAlign(); + AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment)); uint64_t ArrSize = ArrTy->getSize().getZExtValue(); if (!ETy->isIncompleteType() && ETy->isConstantSizeType() && ArrSize) { @@ -2539,10 +2542,15 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // For C99 VLAs with the static keyword, we don't know the size so // we can't use the dereferenceable attribute, but in addrspace(0) // we know that it must be nonnull. - if (ArrTy->getSizeModifier() == VariableArrayType::Static && - !getContext().getTargetAddressSpace(ArrTy->getElementType()) && - !CGM.getCodeGenOpts().NullPointerIsValid) - AI->addAttr(llvm::Attribute::NonNull); + if (ArrTy->getSizeModifier() == VariableArrayType::Static) { + QualType ETy = ArrTy->getElementType(); + llvm::Align Alignment = + CGM.getNaturalTypeAlignment(ETy).getAsAlign(); + AI->addAttrs(llvm::AttrBuilder().addAlignmentAttr(Alignment)); + if (!getContext().getTargetAddressSpace(ETy) && + !CGM.getCodeGenOpts().NullPointerIsValid) + AI->addAttr(llvm::Attribute::NonNull); + } } // Set `align` attribute if any. diff --git a/clang/test/CodeGen/vla.c b/clang/test/CodeGen/vla.c index 16b82f4acc7d38..3142050149aaab 100644 --- a/clang/test/CodeGen/vla.c +++ b/clang/test/CodeGen/vla.c @@ -200,13 +200,13 @@ void test7(int a[b(0)]) { // Make sure we emit dereferenceable or nonnull when the static keyword is // provided. void test8(int a[static 3]) { } -// CHECK: define void @test8(i32* dereferenceable(12) %a) +// CHECK: define void @test8(i32* align 4 dereferenceable(12) %a) void test9(int n, int a[static n]) { } -// NULL-INVALID: define void @test9(i32 %n, i32* nonnull %a) -// NULL-VALID: define void @test9(i32 %n, i32* %a) +// NULL-INVALID: define void @test9(i32 %n, i32* nonnull align 4 %a) +// NULL-VALID: define void @test9(i32 %n, i32* align 4 %a) // Make sure a zero-sized static array extent is still required to be nonnull. void test10(int a[static 0]) {} -// NULL-INVALID: define void @test10(i32* nonnull %a) -// NULL-VALID: define void @test10(i32* %a) +// NULL-INVALID: define void @test10(i32* nonnull align 4 %a) +// NULL-VALID: define void @test10(i32* align 4 %a) From bb18532399cf01c712e18f85ecb1cfb612d664ac Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 4 Aug 2020 14:57:16 -0700 Subject: [PATCH 079/101] [AArch64][SVE] Allow llvm.aarch64.sve.st2/3/4 with vectors of pointers. This isn't necessaary for ACLE, but could be useful in other situations. And the change is simple. Differential Revision: https://reviews.llvm.org/D85251 --- .../Target/AArch64/AArch64ISelLowering.cpp | 15 ++++---- .../CodeGen/AArch64/sve-intrinsics-stores.ll | 38 +++++++++++++++++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2cd2f67171205f..b37947495a4329 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9462,16 +9462,17 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, /// Set the IntrinsicInfo for the `aarch64_sve_st` intrinsics. template -static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info, - const CallInst &CI) { +static bool +setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, + AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) { Info.opc = ISD::INTRINSIC_VOID; // Retrieve EC from first vector argument. - const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType()); + const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType()); ElementCount EC = VT.getVectorElementCount(); #ifndef NDEBUG // Check the assumption that all input vectors are the same type. for (unsigned I = 0; I < NumVecs; ++I) - assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) && + assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && "Invalid type."); #endif // memVT is `NumVecs * VT`. @@ -9494,11 +9495,11 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, auto &DL = I.getModule()->getDataLayout(); switch (Intrinsic) { case Intrinsic::aarch64_sve_st2: - return setInfoSVEStN<2>(Info, I); + return setInfoSVEStN<2>(*this, DL, Info, I); case Intrinsic::aarch64_sve_st3: - return setInfoSVEStN<3>(Info, I); + return setInfoSVEStN<3>(*this, DL, Info, I); case Intrinsic::aarch64_sve_st4: - return setInfoSVEStN<4>(Info, I); + return setInfoSVEStN<4>(*this, DL, Info, I); case Intrinsic::aarch64_neon_ld2: case Intrinsic::aarch64_neon_ld3: case Intrinsic::aarch64_neon_ld4: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll index 92877233b2c9c1..f3fcddbaa2fcf0 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -108,6 +108,17 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %pred, i8** %addr) { +; CHECK-LABEL: st2d_ptr: +; CHECK: st2d { z0.d, z1.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st2.nxv2p0i8( %v0, + %v1, + %pred, + i8** %addr) + ret void +} + ; ; ST3B ; @@ -220,6 +231,18 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %pred, i8** %addr) { +; CHECK-LABEL: st3d_ptr: +; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st3.nxv2p0i8( %v0, + %v1, + %v2, + %pred, + i8** %addr) + ret void +} + ; ; ST4B ; @@ -340,6 +363,18 @@ define void @st4d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, i8** %addr) { +; CHECK-LABEL: st4d_ptr: +; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st4.nxv2p0i8( %v0, + %v1, + %v2, + %v3, + %pred, + i8** %addr) + ret void +} ; ; STNT1B ; @@ -508,6 +543,7 @@ declare void @llvm.aarch64.sve.st2.nxv8f16(, , , , bfloat*) declare void @llvm.aarch64.sve.st2.nxv4f32(, , , float*) declare void @llvm.aarch64.sve.st2.nxv2f64(, , , double*) +declare void @llvm.aarch64.sve.st2.nxv2p0i8(, , , i8** nocapture) declare void @llvm.aarch64.sve.st3.nxv16i8(, , , , i8*) declare void @llvm.aarch64.sve.st3.nxv8i16(, , , , i16*) @@ -517,6 +553,7 @@ declare void @llvm.aarch64.sve.st3.nxv8f16(, , , , , bfloat*) declare void @llvm.aarch64.sve.st3.nxv4f32(, , , , float*) declare void @llvm.aarch64.sve.st3.nxv2f64(, , , , double*) +declare void @llvm.aarch64.sve.st3.nxv2p0i8(, , , , i8** nocapture) declare void @llvm.aarch64.sve.st4.nxv16i8(, , , , , i8*) declare void @llvm.aarch64.sve.st4.nxv8i16(, , , , , i16*) @@ -526,6 +563,7 @@ declare void @llvm.aarch64.sve.st4.nxv8f16(, , , , , , bfloat*) declare void @llvm.aarch64.sve.st4.nxv4f32(, , , , , float*) declare void @llvm.aarch64.sve.st4.nxv2f64(, , , , , double*) +declare void @llvm.aarch64.sve.st4.nxv2p0i8(, , , , , i8** nocapture) declare void @llvm.aarch64.sve.stnt1.nxv16i8(, , i8*) declare void @llvm.aarch64.sve.stnt1.nxv8i16(, , i16*) From be944c85f375c0faa36ee5c7ccbc79ff9a78a0d5 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 10 Aug 2020 12:23:03 -0700 Subject: [PATCH 080/101] [AArch64][SVE] Add patterns for integer mla/mls. We probably want to introduce pseudo-instructions at some point, like we have for binary operations, but this seems okay for now. One thing I'm not sure about is whether we should be doing this as a DAGCombine instead of directly pattern-matching it. I don't see any big downside to doing it this way, though. Differential Revision: https://reviews.llvm.org/D85681 --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 9 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 12 +- .../CodeGen/AArch64/llvm-ir-to-intrinsic.ll | 24 +- llvm/test/CodeGen/AArch64/sve-gep.ll | 17 +- llvm/test/CodeGen/AArch64/sve-int-arith.ll | 207 ++++++++++++------ 5 files changed, 170 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index ea4c6cab5c35d3..03c0bebd44b135 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -233,6 +233,11 @@ def setoeq_or_seteq : PatFrags<(ops node:$lhs, node:$rhs), def setone_or_setne : PatFrags<(ops node:$lhs, node:$rhs), [(setone node:$lhs, node:$rhs), (setne node:$lhs, node:$rhs)]>; +def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2), + (AArch64mul_p node:$pred, node:$src1, node:$src2), [{ + return N->hasOneUse(); +}]>; + let Predicates = [HasSVE] in { defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>; @@ -281,8 +286,8 @@ let Predicates = [HasSVE] in { defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>; defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>; - defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla>; - defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>; + defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", int_aarch64_sve_mla, add, AArch64mul_p_oneuse>; + defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls, sub, AArch64mul_p_oneuse>; // SVE predicated integer reductions. defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 5eb811b9c78eb5..65b0a8623b7d29 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2518,7 +2518,8 @@ class sve_int_mlas_vvv_pred sz8_64, bits<1> opc, string asm, let ElementSize = zprty.ElementSize; } -multiclass sve_int_mlas_vvv_pred opc, string asm, SDPatternOperator op> { +multiclass sve_int_mlas_vvv_pred opc, string asm, SDPatternOperator op, + SDPatternOperator outerop, SDPatternOperator mulop> { def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>; def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>; def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>; @@ -2528,6 +2529,15 @@ multiclass sve_int_mlas_vvv_pred opc, string asm, SDPatternOperator op> def : SVE_4_Op_Pat(NAME # _H)>; def : SVE_4_Op_Pat(NAME # _S)>; def : SVE_4_Op_Pat(NAME # _D)>; + + def : Pat<(outerop nxv16i8:$Op1, (mulop nxv16i1:$pred, nxv16i8:$Op2, nxv16i8:$Op3)), + (!cast(NAME # _B) $pred, $Op1, $Op2, $Op3)>; + def : Pat<(outerop nxv8i16:$Op1, (mulop nxv8i1:$pred, nxv8i16:$Op2, nxv8i16:$Op3)), + (!cast(NAME # _H) $pred, $Op1, $Op2, $Op3)>; + def : Pat<(outerop nxv4i32:$Op1, (mulop nxv4i1:$pred, nxv4i32:$Op2, nxv4i32:$Op3)), + (!cast(NAME # _S) $pred, $Op1, $Op2, $Op3)>; + def : Pat<(outerop nxv2i64:$Op1, (mulop nxv2i1:$pred, nxv2i64:$Op2, nxv2i64:$Op3)), + (!cast(NAME # _D) $pred, $Op1, $Op2, $Op3)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll index d43dcda36231aa..bcff9e056f18c9 100644 --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -131,8 +131,7 @@ define @srem_i8( %a, %b) ; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -151,8 +150,7 @@ define @srem_i16( %a, %b ; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -164,8 +162,7 @@ define @srem_i32( %a, %b ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -177,8 +174,7 @@ define @srem_i64( %a, %b ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: ret %div = srem %a, %b ret %div @@ -315,8 +311,7 @@ define @urem_i8( %a, %b) ; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: ret %div = urem %a, %b ret %div @@ -335,8 +330,7 @@ define @urem_i16( %a, %b ; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: ret %div = urem %a, %b ret %div @@ -348,8 +342,7 @@ define @urem_i32( %a, %b ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mls z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: ret %div = urem %a, %b ret %div @@ -361,8 +354,7 @@ define @urem_i64( %a, %b ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mls z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: ret %div = urem %a, %b ret %div diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll index 4230a7fa287160..8f68a38e2cd20d 100644 --- a/llvm/test/CodeGen/AArch64/sve-gep.ll +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -105,10 +105,11 @@ define *> @scalable_of_scalable_1( insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr , * %base, %idx @@ -119,9 +120,10 @@ define *> @scalable_of_scalable_2( insertelement ( undef, i64 1, i32 0), zeroinitializer, zeroinitializer %d = getelementptr , *> %base, %idx @@ -135,8 +137,7 @@ define *> @scalable_of_scalable_3(, *> %base, %idx ret *> %d diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll index d70e817085500f..bcd94d2d019334 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t @@ -5,200 +6,262 @@ ; WARN-NOT: warning define @add_i64( %a, %b) { -; CHECK-LABEL: add_i64 -; CHECK: add z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: add_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @add_i32( %a, %b) { -; CHECK-LABEL: add_i32 -; CHECK: add z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @add_i16( %a, %b) { -; CHECK-LABEL: add_i16 -; CHECK: add z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @add_i8( %a, %b) { -; CHECK-LABEL: add_i8 -; CHECK: add z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = add %a, %b ret %res } define @sub_i64( %a, %b) { -; CHECK-LABEL: sub_i64 -; CHECK: sub z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sub_i32( %a, %b) { -; CHECK-LABEL: sub_i32 -; CHECK: sub z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sub_i16( %a, %b) { -; CHECK-LABEL: sub_i16 -; CHECK: sub z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sub_i8( %a, %b) { -; CHECK-LABEL: sub_i8 -; CHECK: sub z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: sub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = sub %a, %b ret %res } define @sqadd_i64( %a, %b) { -; CHECK-LABEL: sqadd_i64 -; CHECK: sqadd z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv2i64( %a, %b) ret %res } define @sqadd_i32( %a, %b) { -; CHECK-LABEL: sqadd_i32 -; CHECK: sqadd z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv4i32( %a, %b) ret %res } define @sqadd_i16( %a, %b) { -; CHECK-LABEL: sqadd_i16 -; CHECK: sqadd z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv8i16( %a, %b) ret %res } define @sqadd_i8( %a, %b) { -; CHECK-LABEL: sqadd_i8 -; CHECK: sqadd z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: sqadd_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.sadd.sat.nxv16i8( %a, %b) ret %res } define @sqsub_i64( %a, %b) { -; CHECK-LABEL: sqsub_i64 -; CHECK: sqsub z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv2i64( %a, %b) ret %res } define @sqsub_i32( %a, %b) { -; CHECK-LABEL: sqsub_i32 -; CHECK: sqsub z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv4i32( %a, %b) ret %res } define @sqsub_i16( %a, %b) { -; CHECK-LABEL: sqsub_i16 -; CHECK: sqsub z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv8i16( %a, %b) ret %res } define @sqsub_i8( %a, %b) { -; CHECK-LABEL: sqsub_i8 -; CHECK: sqsub z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: sqsub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.ssub.sat.nxv16i8( %a, %b) ret %res } define @uqadd_i64( %a, %b) { -; CHECK-LABEL: uqadd_i64 -; CHECK: uqadd z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv2i64( %a, %b) ret %res } define @uqadd_i32( %a, %b) { -; CHECK-LABEL: uqadd_i32 -; CHECK: uqadd z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv4i32( %a, %b) ret %res } define @uqadd_i16( %a, %b) { -; CHECK-LABEL: uqadd_i16 -; CHECK: uqadd z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv8i16( %a, %b) ret %res } define @uqadd_i8( %a, %b) { -; CHECK-LABEL: uqadd_i8 -; CHECK: uqadd z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: uqadd_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.uadd.sat.nxv16i8( %a, %b) ret %res } define @uqsub_i64( %a, %b) { -; CHECK-LABEL: uqsub_i64 -; CHECK: uqsub z0.d, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, z1.d +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv2i64( %a, %b) ret %res } define @uqsub_i32( %a, %b) { -; CHECK-LABEL: uqsub_i32 -; CHECK: uqsub z0.s, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, z1.s +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv4i32( %a, %b) ret %res } define @uqsub_i16( %a, %b) { -; CHECK-LABEL: uqsub_i16 -; CHECK: uqsub z0.h, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, z1.h +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv8i16( %a, %b) ret %res } define @uqsub_i8( %a, %b) { -; CHECK-LABEL: uqsub_i8 -; CHECK: uqsub z0.b, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK-LABEL: uqsub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.b, z0.b, z1.b +; CHECK-NEXT: ret %res = call @llvm.usub.sat.nxv16i8( %a, %b) ret %res } +define @mla_i8( %a, %b, %c) { +; CHECK-LABEL: mla_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mla z2.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = add %c, %prod + ret %res +} + +define @mla_i8_multiuse( %a, %b, %c, * %p) { +; CHECK-LABEL: mla_i8_multiuse: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b +; CHECK-NEXT: add z0.b, z2.b, z1.b +; CHECK-NEXT: st1b { z1.b }, p0, [x0] +; CHECK-NEXT: ret + %prod = mul %a, %b + store %prod, * %p + %res = add %c, %prod + ret %res +} + +define @mls_i8( %a, %b, %c) { +; CHECK-LABEL: mls_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mls z2.b, p0/m, z0.b, z1.b +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: ret + %prod = mul %a, %b + %res = sub %c, %prod + ret %res +} + declare @llvm.sadd.sat.nxv16i8(, ) declare @llvm.sadd.sat.nxv8i16(, ) declare @llvm.sadd.sat.nxv4i32(, ) From 45901ebd430566a1b4ed7ae954a1d6d1722f3276 Mon Sep 17 00:00:00 2001 From: Marius Brehler Date: Tue, 18 Aug 2020 22:16:00 +0200 Subject: [PATCH 081/101] [mlir] Check libraries linked into standalone-opt Adds a call to mlir_check_all_link_libraries() to check all libraries linked into standalone-opt. --- mlir/examples/standalone/standalone-opt/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mlir/examples/standalone/standalone-opt/CMakeLists.txt b/mlir/examples/standalone/standalone-opt/CMakeLists.txt index 854fd556ae868c..06bbb4712645a3 100644 --- a/mlir/examples/standalone/standalone-opt/CMakeLists.txt +++ b/mlir/examples/standalone/standalone-opt/CMakeLists.txt @@ -10,3 +10,5 @@ add_llvm_executable(standalone-opt standalone-opt.cpp) llvm_update_compile_flags(standalone-opt) target_link_libraries(standalone-opt PRIVATE ${LIBS}) + +mlir_check_all_link_libraries(standalone-opt) From 0b98a59fedb5e98661ca531d5ed20110bfdd7b2f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 18 Aug 2020 13:44:29 -0400 Subject: [PATCH 082/101] [VectorCombine] add tests for vector loads; NFC --- .../test/Transforms/VectorCombine/X86/load.ll | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll index 524f48332b7cb8..104c8c2d025faf 100644 --- a/llvm/test/Transforms/VectorCombine/X86/load.ll +++ b/llvm/test/Transforms/VectorCombine/X86/load.ll @@ -373,3 +373,29 @@ define <8 x i32> @casted_load_i32_insert_v8i32(<4 x i32>* align 4 dereferenceabl %r = insertelement <8 x i32> undef, i32 %s, i32 0 ret <8 x i32> %r } + +; TODO: Should load v4f32. + +define <8 x float> @load_f32_insert_v8f32(float* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_f32_insert_v8f32( +; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = insertelement <8 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: ret <8 x float> [[R]] +; + %s = load float, float* %p, align 4 + %r = insertelement <8 x float> undef, float %s, i32 0 + ret <8 x float> %r +} + +; TODO: Should load v4f32. + +define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p) { +; CHECK-LABEL: @load_f32_insert_v2f32( +; CHECK-NEXT: [[S:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0 +; CHECK-NEXT: ret <2 x float> [[R]] +; + %s = load float, float* %p, align 4 + %r = insertelement <2 x float> undef, float %s, i32 0 + ret <2 x float> %r +} From 08748d15b8d696db9f894db38d74a212b8ab43e6 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 17 Aug 2020 17:26:50 -0700 Subject: [PATCH 083/101] Fix a check that was attempting to see if an object file was in memory. Checking if an object file is in memory should use the ObjectFile::IsInMemory(), not test ObjectFile::BaseAddress(). ObjectFile::BaseAddress() is designed to be overridden by all classes and is for mach-o, ELF and COFF plug-ins. They find the header base adddress and return that as a section offset address. The default implementation of ObjectFile::BaseAddress() does try and make an Address() from the ObjectFile::m_memory_addr, but I switched it to a correct function call. Differential Revision: https://reviews.llvm.org/D86122 --- lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e7701c350ff518..babe5a3847274b 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1628,7 +1628,7 @@ void ObjectFileMachO::ProcessSegmentCommand(const load_command &load_cmd_, } else if (unified_section_sp) { if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) { // Check to see if the module was read from memory? - if (module_sp->GetObjectFile()->GetBaseAddress().IsValid()) { + if (module_sp->GetObjectFile()->IsInMemory()) { // We have a module that is in memory and needs to have its file // address adjusted. We need to do this because when we load a file // from memory, its addresses will be slid already, yet the addresses From 84fffa67283139954b7764328966b5f766db1003 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Mon, 17 Aug 2020 15:25:08 -0700 Subject: [PATCH 084/101] [Coverage] Adjust skipped regions only if {Prev,Next}TokLoc is in the same file as regions' {start, end}Loc Fix a bug if {Prev, Next}TokLoc is in different file from skipped regions' {start, end}Loc Differential Revision: https://reviews.llvm.org/D86116 --- clang/lib/CodeGen/CoverageMappingGen.cpp | 24 ++++++++++++--------- clang/test/CoverageMapping/Inputs/comment.h | 6 ++++++ clang/test/CoverageMapping/comment.cpp | 13 +++++++++++ 3 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 clang/test/CoverageMapping/Inputs/comment.h create mode 100644 clang/test/CoverageMapping/comment.cpp diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index e6e1b211193591..8277804d27c0ec 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -44,7 +44,8 @@ CoverageMappingModuleGen::setUpCoverageCallbacks(Preprocessor &PP) { PP.setTokenWatcher([CoverageInfo](clang::Token Tok) { // Update previous token location. CoverageInfo->PrevTokLoc = Tok.getLocation(); - CoverageInfo->updateNextTokLoc(Tok.getLocation()); + if (Tok.getKind() != clang::tok::eod) + CoverageInfo->updateNextTokLoc(Tok.getLocation()); }); return CoverageInfo; } @@ -305,20 +306,24 @@ class CoverageMappingBuilder { /// non-comment token. If shrinking the skipped range would make it empty, /// this returns None. Optional adjustSkippedRange(SourceManager &SM, - SpellingRegion SR, + SourceLocation LocStart, + SourceLocation LocEnd, SourceLocation PrevTokLoc, SourceLocation NextTokLoc) { + SpellingRegion SR{SM, LocStart, LocEnd}; // If Range begin location is invalid, it's not a comment region. if (PrevTokLoc.isInvalid()) return SR; unsigned PrevTokLine = SM.getSpellingLineNumber(PrevTokLoc); unsigned NextTokLine = SM.getSpellingLineNumber(NextTokLoc); SpellingRegion newSR(SR); - if (SR.LineStart == PrevTokLine) { + if (SM.isWrittenInSameFile(LocStart, PrevTokLoc) && + SR.LineStart == PrevTokLine) { newSR.LineStart = SR.LineStart + 1; newSR.ColumnStart = 1; } - if (SR.LineEnd == NextTokLine) { + if (SM.isWrittenInSameFile(LocEnd, NextTokLoc) && + SR.LineEnd == NextTokLine) { newSR.LineEnd = SR.LineEnd - 1; newSR.ColumnEnd = SR.ColumnStart + 1; } @@ -354,14 +359,13 @@ class CoverageMappingBuilder { auto CovFileID = getCoverageFileID(LocStart); if (!CovFileID) continue; - SpellingRegion SR{SM, LocStart, LocEnd}; - if (Optional res = - adjustSkippedRange(SM, SR, I.PrevTokLoc, I.NextTokLoc)) - SR = res.getValue(); - else + Optional SR = + adjustSkippedRange(SM, LocStart, LocEnd, I.PrevTokLoc, I.NextTokLoc); + if (!SR.hasValue()) continue; auto Region = CounterMappingRegion::makeSkipped( - *CovFileID, SR.LineStart, SR.ColumnStart, SR.LineEnd, SR.ColumnEnd); + *CovFileID, SR->LineStart, SR->ColumnStart, SR->LineEnd, + SR->ColumnEnd); // Make sure that we only collect the regions that are inside // the source code of this function. if (Region.LineStart >= FileLineRanges[*CovFileID].first && diff --git a/clang/test/CoverageMapping/Inputs/comment.h b/clang/test/CoverageMapping/Inputs/comment.h new file mode 100644 index 00000000000000..eec5833c2bd0bc --- /dev/null +++ b/clang/test/CoverageMapping/Inputs/comment.h @@ -0,0 +1,6 @@ + + + + + +x = 0; diff --git a/clang/test/CoverageMapping/comment.cpp b/clang/test/CoverageMapping/comment.cpp new file mode 100644 index 00000000000000..f8e4b4912e182f --- /dev/null +++ b/clang/test/CoverageMapping/comment.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s | FileCheck %s + +int f() { + int x = 0; +#include "Inputs/comment.h" /* + */ + return x; +} + +// CHECK: File 0, 3:9 -> 8:2 = #0 +// CHECK-NEXT: Expansion,File 0, 5:10 -> 5:28 = #0 +// CHECK-NEXT: Skipped,File 0, 6:1 -> 6:7 = 0 +// CHECK-NEXT: File 1, 1:1 -> 7:1 = #0 From 1870b52f0c0880ad9e40eb01344372c59dbc0fb1 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Tue, 18 Aug 2020 11:06:16 -0700 Subject: [PATCH 085/101] Recommit "PR44685: DebugInfo: Handle address-use-invalid type units referencing non-type units" Originally committed as be3ef93bf58aa5546c7baadfb21d43b75fbb4e24. Reverted by b4bffdbadfcceb3959aaf231c1542301944e5812 due to bot failures: http://green.lab.llvm.org/green/job/clang-stage1-cmake-RA-expensive/17380/testReport/junit/LLVM/DebugInfo_X86/addr_tu_to_non_tu_ll/ http://45.33.8.238/win/22216/step_11.txt MacOS failure due to testing Split DWARF which isn't compatible with MachO. Windows failure due to testing type units which aren't enabled on Windows. Fix both of these by applying an explicit x86 linux triple to the test. --- llvm/lib/CodeGen/AsmPrinter/AddressPool.h | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 6 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 1 + llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll | 89 ++++++++++++++++++++ 4 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h index f92cf72093ca03..f1edc6c330d51e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.h +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -48,7 +48,7 @@ class AddressPool { bool hasBeenUsed() const { return HasBeenUsed; } - void resetUsedFlag() { HasBeenUsed = false; } + void resetUsedFlag(bool HasBeenUsed = false) { this->HasBeenUsed = HasBeenUsed; } MCSymbol *getLabel() { return AddressTableBaseSym; } void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index f70eed32f0b532..cee72120accb79 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3305,14 +3305,14 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) : DD(DD), - TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) { + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) { DD->TypeUnitsUnderConstruction.clear(); - assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed()); + DD->AddrPool.resetUsedFlag(); } DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); - DD->AddrPool.resetUsedFlag(); + DD->AddrPool.resetUsedFlag(AddrPoolUsed); } DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0b943ebe46b669..93e08d1151ff70 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -648,6 +648,7 @@ class DwarfDebug : public DebugHandlerBase { class NonTypeUnitContext { DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; + bool AddrPoolUsed; friend class DwarfDebug; NonTypeUnitContext(DwarfDebug *DD); public: diff --git a/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll b/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll new file mode 100644 index 00000000000000..e81cb38c2131b0 --- /dev/null +++ b/llvm/test/DebugInfo/X86/addr-tu-to-non-tu.ll @@ -0,0 +1,89 @@ +; RUN: llc -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=x.dwo < %s \ +; RUN: | llvm-dwarfdump -debug-info -debug-types - \ +; RUN: | FileCheck --implicit-check-not=Unit --implicit-check-not=contents --implicit-check-not=declaration %s + +; Test that an address-using-with-Split-DWARF type unit that references a +; non-type unit is handled correctly. A NonTypeUnitContext is used to insulate +; the type construction from being discarded when the prior/outer type has to be +; discarded due to finding it used an address & so can't be type united under +; Split DWARF. + +; The intermediate types tu and t2 are here just to test a bit more +; thoroughly/broadly. They also demonstrate one slight limitation/sub-optimality +; since 't2' isn't put in a type unit. + + +; extern int foo; +; namespace { +; struct t1 { +; }; +; } +; template struct t2 { +; t1 v1; +; }; +; struct t3 { +; t2<&foo> v1; +; }; +; t3 v1; + +; CHECK: .debug_info contents: +; CHECK: Compile Unit: + +; CHECK: .debug_info.dwo contents: +; CHECK: Compile Unit: + +; FIXME: In theory "t3" could be in a type unit - but at the moment, because it +; references t2, which needs an address, t3 gets non-type-united. +; But the same doesn't happen if t3 referenced an anonymous namespace type. + +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("t3") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "t2<&foo>" +; CHECK: DW_TAG_namespace +; CHECK: [[T1:0x[0-9a-f]*]]: DW_TAG_structure_type +; CHECK: DW_AT_name ("t1") +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("t2<&foo>") +; CHECK: DW_TAG_member +; CHECK: DW_AT_name ("v1") +; CHECK: DW_AT_type ([[T1]] "t1") + +; CHECK: .debug_types contents: + +; CHECK-NOT: .debug_types.dwo contents: + + +%struct.t3 = type { %struct.t2 } +%struct.t2 = type { %"struct.(anonymous namespace)::t1" } +%"struct.(anonymous namespace)::t1" = type { i8 } + +@v1 = dso_local global %struct.t3 zeroinitializer, align 1, !dbg !0 +@foo = external dso_local global i32, align 4 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!18, !19, !20} +!llvm.ident = !{!21} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "v1", scope: !2, file: !3, line: 16, type: !6, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 12.0.0 (git@github.com:llvm/llvm-project.git be646ae2865371c7a4966797e88f355de5653e04)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "test.dwo", emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: GNU) +!3 = !DIFile(filename: "test.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch") +!4 = !{} +!5 = !{!0} +!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t3", file: !3, line: 12, size: 8, flags: DIFlagTypePassByValue, elements: !7, identifier: "_ZTS2t3") +!7 = !{!8} +!8 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !6, file: !3, line: 13, baseType: !9, size: 8) +!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t2<&foo>", file: !3, line: 8, size: 8, flags: DIFlagTypePassByValue, elements: !10, templateParams: !14, identifier: "_ZTS2t2IXadL_Z3fooEEE") +!10 = !{!11} +!11 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !9, file: !3, line: 9, baseType: !12, size: 8) +!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", scope: !13, file: !3, line: 4, size: 8, flags: DIFlagTypePassByValue, elements: !4) +!13 = !DINamespace(scope: null) +!14 = !{!15} +!15 = !DITemplateValueParameter(type: !16, value: i32* @foo) +!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !{i32 7, !"Dwarf Version", i32 4} +!19 = !{i32 2, !"Debug Info Version", i32 3} +!20 = !{i32 1, !"wchar_size", i32 4} +!21 = !{!"clang version 12.0.0 (git@github.com:llvm/llvm-project.git be646ae2865371c7a4966797e88f355de5653e04)"} From 5ccac05d433cf8a46683acb5293fb43280d0f2ed Mon Sep 17 00:00:00 2001 From: MaheshRavishankar Date: Tue, 18 Aug 2020 13:26:29 -0700 Subject: [PATCH 086/101] [mlir][Linalg] Modify callback for getting id/nprocs in LinalgDistribution options to allow more general distributions. Changing the signature of the callback to send in the ranges for all the parallel loops and expect a vector with the Value to use for the processor-id and number-of-processors for each of the parallel loops. Differential Revision: https://reviews.llvm.org/D86095 --- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 14 +- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 24 +++- .../Dialect/Linalg/tile-and-distribute.mlir | 136 +++++++++--------- .../lib/Transforms/TestLinalgTransforms.cpp | 21 ++- 4 files changed, 103 insertions(+), 92 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 794ebcbc264516..beef1a70096e67 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -198,19 +198,23 @@ enum class DistributionMethod { }; /// Callback function type used to get processor ID, and number of processors -/// used for distribution. +/// used for distribution for all parallel loops generated. struct ProcInfo { Value procId; Value nprocs; }; -using ProcInfoCallBackFn = - std::function; +using ProcInfoCallBackFn = std::function( + OpBuilder &b, Location loc, ArrayRef parallelLoopRanges)>; /// Options that allow distribution of loops generated in Linalg transforms to /// processors while generating the loops. struct LinalgLoopDistributionOptions { - /// Callback function that returns the Value for processor ID, and number of - /// processors used to execute a given loop. + /// Callback function that returns the Values for processor ID (`procId`), and + /// number of processors (`nprocs`) used to execute the parallel loops. The + /// number of `{procId, nprocs}` pairs returned must be equal to the number of + /// `parallelLoopRanges` passed into the callback, which in-turn is same as + /// the number of parallel loops for which the `distributionMethod` is + /// specified below. ProcInfoCallBackFn procInfo; /// Specification of how to distribute the `scf.parallel` loops that are /// generated. As the `scf.parallel` loop is generated, the elements of this diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 4e9cbe9d913d11..cf14555aa63fc7 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -334,21 +334,31 @@ void GenerateLoopNest::doit( SmallVector distributionMethod; if (distributionOptions) { auto &options = distributionOptions.getValue(); - unsigned index = 0; OpBuilder &builder = edsc::ScopedContext::getBuilderRef(); Location loc = edsc::ScopedContext::getLocation(); distributionMethod.assign(distributionOptions->distributionMethod.begin(), distributionOptions->distributionMethod.end()); - for (auto iteratorType : enumerate(iteratorTypes)) - if (isParallelIteratorType(iteratorType.value()) && - index < distributionMethod.size()) { + SmallVector parallelLoopRanges; + for (auto iteratorType : enumerate(iteratorTypes)) { + if (isParallelIteratorType(iteratorType.value())) + parallelLoopRanges.push_back(loopRanges[iteratorType.index()]); + } + if (distributionMethod.size() < parallelLoopRanges.size()) + parallelLoopRanges.resize(distributionMethod.size()); + SmallVector procInfo = + options.procInfo(builder, loc, parallelLoopRanges); + unsigned index = 0; + for (auto iteratorType : enumerate(iteratorTypes)) { + if (index >= procInfo.size()) + break; + if (isParallelIteratorType(iteratorType.value())) { unsigned i = iteratorType.index(); - ProcInfo procInfo = options.procInfo(builder, loc, index); - updateBoundsForCyclicDistribution(builder, loc, procInfo.procId, - procInfo.nprocs, lbsStorage[i], + updateBoundsForCyclicDistribution(builder, loc, procInfo[index].procId, + procInfo[index].nprocs, lbsStorage[i], ubsStorage[i], stepsStorage[i]); index++; } + } } ValueRange lbs(lbsStorage), ubs(ubsStorage), steps(stepsStorage); generateParallelLoopNest(lbs, ubs, steps, iteratorTypes, bodyBuilderFn, ivs, diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir index e1bc28e133bde2..08f6d19fe6d6f0 100644 --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir @@ -11,16 +11,16 @@ func @gemm1(%a : memref, %b : memref, %c : memref) // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: %[[T1:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK: %[[T2:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} +// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} // CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[T3:.*]] = affine.apply #[[MAP0]]()[%[[T1]]] -// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T3]], %[[ARG3]]] -// CHECK: %[[T11:.*]] = affine.apply #[[MAP0]]()[%[[T2]]] -// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[T11]]] -// CHECK: %[[T15:.*]] = affine.apply #[[MAP0]]()[%[[T1]]] -// CHECK: %[[T18:.*]] = affine.apply #[[MAP0]]()[%[[T2]]] -// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T15]], %[[T18]]] +// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] +// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX]]] // CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]] // ----- @@ -36,22 +36,22 @@ func @gemm2(%a : memref, %b : memref, %c : memref) // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: %[[T3:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK: %[[T4:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[T5:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK: %[[T6:.*]] = affine.apply #[[MAP0]]()[%[[T5]]] -// CHECK: %[[T7:.*]] = cmpi "slt", %[[T4]], %{{.*}} -// CHECK: %[[T8:.*]] = cmpi "slt", %[[T6]], %{{.*}} -// CHECK: %[[T9:.*]] = and %[[T7]], %[[T8]] -// CHECK: scf.if %[[T9]] +// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} +// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK: %[[ITERY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[ITERX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[INBOUNDSY:.*]] = cmpi "slt", %[[ITERY]], %{{.*}} +// CHECK: %[[INBOUNDSX:.*]] = cmpi "slt", %[[ITERX]], %{{.*}} +// CHECK: %[[INBOUNDS:.*]] = and %[[INBOUNDSY]], %[[INBOUNDSX]] +// CHECK: scf.if %[[INBOUNDS]] // CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[T10:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T10]], %[[ARG3]]] -// CHECK: %[[T18:.*]] = affine.apply #[[MAP0]]()[%[[T5]]] -// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[T18]]] -// CHECK: %[[T22:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[T25:.*]] = affine.apply #[[MAP0]]()[%[[T5]]] -// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T22]], %[[T25]]] +// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] +// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] // CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]] // ----- @@ -67,15 +67,15 @@ func @gemm3(%a : memref, %b : memref, %c : memref) // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: %[[T3:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK: %[[T4:.*]] = "gpu.grid_dim"() {dimension = "y"} -// CHECK: %[[T5:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[T6:.*]] = affine.apply #[[MAP0]]()[%[[T4]]] -// CHECK: %[[T7:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK: %[[T8:.*]] = "gpu.grid_dim"() {dimension = "x"} -// CHECK: %[[T9:.*]] = affine.apply #[[MAP0]]()[%[[T7]]] -// CHECK: %[[T10:.*]] = affine.apply #[[MAP0]]()[%[[T8]]] -// CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[T5]], %[[T9]]) to (%{{.*}}, %{{.*}}) step (%[[T6]], %[[T10]]) +// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} +// CHECK: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"} +// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"} +// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] +// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]] +// CHECK: scf.parallel (%[[ARG3:.*]], %[[ARG4:.*]]) = (%[[LBY]], %[[LBX]]) to (%{{.*}}, %{{.*}}) step (%[[STEPY]], %[[STEPX]]) // CHECK: scf.for %[[ARG5:.*]] = // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[ARG3]], %[[ARG5]]] // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG5]], %[[ARG4]]] @@ -95,19 +95,19 @@ func @gemm4(%a : memref, %b : memref, %c : memref) // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: %[[T2:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK: %[[T3:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK: %[[T4:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[T5:.*]] = cmpi "slt", %[[T4]], %{{.*}} -// CHECK: scf.if %[[T5]] +// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} +// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[INBOUNDS:.*]] = cmpi "slt", %[[LBX]], %{{.*}} +// CHECK: scf.if %[[INBOUNDS]] // CHECK: scf.for %[[ARG3:.*]] = -// CHECK: %[[T6:.*]] = affine.apply #[[MAP0]]()[%[[T2]]] -// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T6]], %[[ARG3]]] -// CHECK: %[[T14:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[T14]]] -// CHECK: %[[T18:.*]] = affine.apply #[[MAP0]]()[%[[T2]]] -// CHECK: %[[T21:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T18]], %[[T21]]] +// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] +// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] // CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]] // ----- @@ -123,21 +123,21 @@ func @gemm5(%a : memref, %b : memref, %c : memref) // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: %[[T3:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK: %[[T4:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[T5:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK: %[[T6:.*]] = "gpu.grid_dim"() {dimension = "x"} -// CHECK: %[[T7:.*]] = affine.apply #[[MAP0]]()[%[[T5]]] -// CHECK: %[[T8:.*]] = affine.apply #[[MAP0]]()[%[[T6]]] -// CHECK: %[[T9:.*]] = cmpi "slt", %[[T4]], %{{.*}} -// CHECK: scf.if %[[T9]] -// CHECK: scf.parallel (%[[ARG3.*]]) = (%[[T7]]) to (%{{.*}}) step (%[[T8]]) +// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} +// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"} +// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]] +// CHECK: %[[INBOUNDS:.*]] = cmpi "slt", %[[LBY]], %{{.*}} +// CHECK: scf.if %[[INBOUNDS]] +// CHECK: scf.parallel (%[[ARG3.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]]) // CHECK: scf.for %[[ARG4:.*]] = -// CHECK: %[[T10:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[T10]], %[[ARG4]]] +// CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]] // CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[ARG3]]] -// CHECK: %[[T21:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[T21]], %[[ARG3]]] +// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]] // CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]] // ----- @@ -153,16 +153,16 @@ func @gemm6(%a : memref, %b : memref, %c : memref) // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK: %[[T2:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK: %[[T3:.*]] = "gpu.grid_dim"() {dimension = "y"} -// CHECK: %[[T4:.*]] = affine.apply #[[MAP0]]()[%[[T2]]] -// CHECK: %[[T5:.*]] = affine.apply #[[MAP0]]()[%[[T3]]] -// CHECK: %[[T6:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK: scf.parallel (%[[ARG3.*]]) = (%[[T4]]) to (%{{.*}}) step (%[[T5]]) +// CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} +// CHECK: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"} +// CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] +// CHECK: scf.parallel (%[[ARG3.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]]) // CHECK: scf.for %[[ARG4:.*]] = // CHECK: %[[SV1:.*]] = subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] -// CHECK: %[[T14:.*]] = affine.apply #[[MAP0]]()[%[[T6]]] -// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[T14]]] -// CHECK: %[[T20:.*]] = affine.apply #[[MAP0]]()[%[[T6]]] -// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[ARG3]], %[[T20]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV2:.*]] = subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]] +// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV3:.*]] = subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]] // CHECK: linalg.matmul %[[SV1]], %[[SV2]], %[[SV3]] diff --git a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp index f6c1160d35b092..dffe4f2a0796a0 100644 --- a/mlir/test/lib/Transforms/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Transforms/TestLinalgTransforms.cpp @@ -289,19 +289,16 @@ static void fillPromotionCallBackPatterns(MLIRContext *ctx, } template -static ProcInfo getGpuProcIds(OpBuilder &b, Location loc, unsigned loopNum) { +static SmallVector +getGpuProcIds(OpBuilder &b, Location loc, + ArrayRef parallelLoopRanges) { Type indexType = b.getIndexType(); - switch (loopNum) { - case 0: - return {b.create(loc, indexType, b.getStringAttr("y")), - b.create(loc, indexType, b.getStringAttr("y"))}; - case 1: - return {b.create(loc, indexType, b.getStringAttr("x")), - b.create(loc, indexType, b.getStringAttr("x"))}; - default: - llvm_unreachable("test patterns handles only upto 2-level nested loops"); - } - return {nullptr, nullptr}; + SmallVector procInfo(2); + procInfo[0] = {b.create(loc, indexType, b.getStringAttr("y")), + b.create(loc, indexType, b.getStringAttr("y"))}; + procInfo[1] = {b.create(loc, indexType, b.getStringAttr("x")), + b.create(loc, indexType, b.getStringAttr("x"))}; + return procInfo; } static void fillTileAndDistributePatterns(MLIRContext *context, From e1de2b75501e5eaf8777bd5248382a7c55a44fd6 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Tue, 18 Aug 2020 20:01:19 +0000 Subject: [PATCH 087/101] Separate the Registration from Loading dialects in the Context This changes the behavior of constructing MLIRContext to no longer load globally registered dialects on construction. Instead Dialects are only loaded explicitly on demand: - the Parser is lazily loading Dialects in the context as it encounters them during parsing. This is the only purpose for registering dialects and not load them in the context. - Passes are expected to declare the dialects they will create entity from (Operations, Attributes, or Types), and the PassManager is loading Dialects into the Context when starting a pipeline. This changes simplifies the configuration of the registration: a compiler only need to load the dialect for the IR it will emit, and the optimizer is self-contained and load the required Dialects. For example in the Toy tutorial, the compiler only needs to load the Toy dialect in the Context, all the others (linalg, affine, std, LLVM, ...) are automatically loaded depending on the optimization pipeline enabled. To adjust to this change, stop using the existing dialect registration: the global registry will be removed soon. 1) For passes, you need to override the method: virtual void getDependentDialects(DialectRegistry ®istry) const {} and registery on the provided registry any dialect that this pass can produce. Passes defined in TableGen can provide this list in the dependentDialects list field. 2) For dialects, on construction you can register dependent dialects using the provided MLIRContext: `context.getOrLoadDialect()` This is useful if a dialect may canonicalize or have interfaces involving another dialect. 3) For loading IR, dialect that can be in the input file must be explicitly registered with the context. `MlirOptMain()` is taking an explicit registry for this purpose. See how the standalone-opt.cpp example is setup: mlir::DialectRegistry registry; mlir::registerDialect(); mlir::registerDialect(); Only operations from these two dialects can be in the input file. To include all of the dialects in MLIR Core, you can populate the registry this way: mlir::registerAllDialects(registry); 4) For `mlir-translate` callback, as well as frontend, Dialects can be loaded in the context before emitting the IR: context.getOrLoadDialect() --- .../standalone-opt/standalone-opt.cpp | 11 ++- mlir/examples/toy/Ch2/toyc.cpp | 7 +- mlir/examples/toy/Ch3/toyc.cpp | 6 +- mlir/examples/toy/Ch4/toyc.cpp | 6 +- .../toy/Ch5/mlir/LowerToAffineLoops.cpp | 3 + mlir/examples/toy/Ch5/toyc.cpp | 6 +- .../toy/Ch6/mlir/LowerToAffineLoops.cpp | 3 + mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp | 3 + mlir/examples/toy/Ch6/toyc.cpp | 6 +- .../toy/Ch7/mlir/LowerToAffineLoops.cpp | 3 + mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp | 3 + mlir/examples/toy/Ch7/toyc.cpp | 6 +- mlir/include/mlir-c/IR.h | 6 ++ mlir/include/mlir/Conversion/Passes.td | 26 ++++++ mlir/include/mlir/Dialect/Affine/Passes.td | 1 + .../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 1 + .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td | 5 ++ .../include/mlir/Dialect/LLVMIR/NVVMDialect.h | 1 + mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 1 + .../mlir/Dialect/LLVMIR/ROCDLDialect.h | 1 + mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 1 + mlir/include/mlir/Dialect/Linalg/Passes.td | 8 ++ mlir/include/mlir/Dialect/SCF/Passes.td | 1 + mlir/include/mlir/IR/Dialect.h | 87 ++++++++++++++++--- mlir/include/mlir/IR/FunctionSupport.h | 4 +- mlir/include/mlir/IR/MLIRContext.h | 67 +++++++++++--- mlir/include/mlir/IR/OpBase.td | 5 ++ mlir/include/mlir/InitAllDialects.h | 47 +++++----- mlir/include/mlir/InitAllTranslations.h | 4 +- mlir/include/mlir/Pass/Pass.h | 8 ++ mlir/include/mlir/Pass/PassBase.td | 3 + mlir/include/mlir/Pass/PassManager.h | 14 +++ mlir/include/mlir/Support/MlirOptMain.h | 20 ++++- mlir/include/mlir/TableGen/Dialect.h | 8 +- mlir/include/mlir/TableGen/Pass.h | 4 + mlir/include/mlir/Transforms/Passes.td | 2 + mlir/lib/CAPI/IR/IR.cpp | 9 +- ...ConvertGPULaunchFuncToVulkanLaunchFunc.cpp | 1 + .../Conversion/LinalgToLLVM/LinalgToLLVM.cpp | 1 + mlir/lib/Conversion/PassDetail.h | 32 +++++++ .../StandardToLLVM/StandardToLLVM.cpp | 2 +- .../LegalizeStandardForSPIRV.cpp | 1 + .../Dialect/Affine/Transforms/PassDetail.h | 10 +++ mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 1 + .../Dialect/Linalg/Transforms/PassDetail.h | 9 ++ mlir/lib/Dialect/SCF/Transforms/PassDetail.h | 5 ++ mlir/lib/Dialect/SDBM/SDBMExpr.cpp | 2 +- mlir/lib/ExecutionEngine/JitRunner.cpp | 4 +- mlir/lib/IR/Dialect.cpp | 36 +++++--- mlir/lib/IR/MLIRContext.cpp | 87 ++++++++++++------- mlir/lib/IR/Operation.cpp | 2 +- mlir/lib/IR/Verifier.cpp | 4 +- mlir/lib/Parser/AttributeParser.cpp | 10 ++- mlir/lib/Parser/DialectSymbolParser.cpp | 7 +- mlir/lib/Parser/Parser.cpp | 41 ++++++--- mlir/lib/Pass/Pass.cpp | 27 ++++++ mlir/lib/Pass/PassDetail.h | 4 + mlir/lib/Support/MlirOptMain.cpp | 48 +++++----- mlir/lib/TableGen/Dialect.cpp | 8 ++ mlir/lib/TableGen/Pass.cpp | 5 ++ mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp | 1 + mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 5 +- mlir/lib/Transforms/PassDetail.h | 7 ++ mlir/test/CAPI/ir.c | 1 + mlir/test/EDSC/builder-api-test.cpp | 18 ++-- mlir/test/SDBM/sdbm-api-test.cpp | 9 +- .../Dialect/Affine/TestVectorizationUtils.cpp | 4 + .../lib/Dialect/SPIRV/TestAvailability.cpp | 2 +- mlir/test/lib/Dialect/Test/TestDialect.cpp | 4 + mlir/test/lib/Dialect/Test/TestDialect.h | 2 + mlir/test/lib/Dialect/Test/TestPatterns.cpp | 4 + .../lib/Transforms/TestAllReduceLowering.cpp | 4 + .../lib/Transforms/TestBufferPlacement.cpp | 4 + .../lib/Transforms/TestGpuMemoryPromotion.cpp | 7 ++ .../lib/Transforms/TestLinalgHoisting.cpp | 4 + .../lib/Transforms/TestLinalgTransforms.cpp | 11 +++ .../lib/Transforms/TestVectorTransforms.cpp | 8 ++ mlir/test/mlir-opt/commandline.mlir | 2 +- .../mlir-linalg-ods-gen.cpp | 2 +- mlir/tools/mlir-opt/mlir-opt.cpp | 8 +- mlir/tools/mlir-tblgen/DialectGen.cpp | 20 ++++- mlir/tools/mlir-tblgen/PassGen.cpp | 21 ++++- mlir/tools/mlir-translate/mlir-translate.cpp | 3 +- .../Dialect/Quant/QuantizationUtilsTest.cpp | 15 ++-- .../Dialect/SPIRV/DeserializationTest.cpp | 3 +- .../Dialect/SPIRV/SerializationTest.cpp | 5 +- mlir/unittests/IR/AttributeTest.cpp | 32 +++---- mlir/unittests/IR/DialectTest.cpp | 6 +- mlir/unittests/IR/OperationSupportTest.cpp | 8 +- mlir/unittests/Pass/AnalysisManagerTest.cpp | 8 +- mlir/unittests/SDBM/SDBMTest.cpp | 7 +- mlir/unittests/TableGen/OpBuildGen.cpp | 9 +- mlir/unittests/TableGen/StructsGenTest.cpp | 2 +- 93 files changed, 759 insertions(+), 231 deletions(-) diff --git a/mlir/examples/standalone/standalone-opt/standalone-opt.cpp b/mlir/examples/standalone/standalone-opt/standalone-opt.cpp index b33dab26a71367..2dfb859ebd0526 100644 --- a/mlir/examples/standalone/standalone-opt/standalone-opt.cpp +++ b/mlir/examples/standalone/standalone-opt/standalone-opt.cpp @@ -24,9 +24,16 @@ int main(int argc, char **argv) { mlir::registerAllDialects(); mlir::registerAllPasses(); + // TODO: Register standalone passes here. + mlir::DialectRegistry registry; mlir::registerDialect(); - // TODO: Register standalone passes here. + mlir::registerDialect(); + // Add the following to include *all* MLIR Core dialects, or selectively + // include what you need like above. You only need to register dialects that + // will be *parsed* by the tool, not the one generated + // registerAllDialects(registry); - return failed(mlir::MlirOptMain(argc, argv, "Standalone optimizer driver\n")); + return failed( + mlir::MlirOptMain(argc, argv, "Standalone optimizer driver\n", registry)); } diff --git a/mlir/examples/toy/Ch2/toyc.cpp b/mlir/examples/toy/Ch2/toyc.cpp index d0880ce0971b6e..99232d8f24a4a5 100644 --- a/mlir/examples/toy/Ch2/toyc.cpp +++ b/mlir/examples/toy/Ch2/toyc.cpp @@ -68,10 +68,9 @@ std::unique_ptr parseInputFile(llvm::StringRef filename) { } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); - - mlir::MLIRContext context; + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); // Handle '.toy' input to the compiler. if (inputType != InputType::MLIR && diff --git a/mlir/examples/toy/Ch3/toyc.cpp b/mlir/examples/toy/Ch3/toyc.cpp index f9d5631719e8b6..d0430ce16e54a8 100644 --- a/mlir/examples/toy/Ch3/toyc.cpp +++ b/mlir/examples/toy/Ch3/toyc.cpp @@ -102,10 +102,10 @@ int loadMLIR(llvm::SourceMgr &sourceMgr, mlir::MLIRContext &context, } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; llvm::SourceMgr sourceMgr; mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); diff --git a/mlir/examples/toy/Ch4/toyc.cpp b/mlir/examples/toy/Ch4/toyc.cpp index e11f35c5f7e10c..9f95887d270738 100644 --- a/mlir/examples/toy/Ch4/toyc.cpp +++ b/mlir/examples/toy/Ch4/toyc.cpp @@ -103,10 +103,10 @@ int loadMLIR(llvm::SourceMgr &sourceMgr, mlir::MLIRContext &context, } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; llvm::SourceMgr sourceMgr; mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); diff --git a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp index 3097681ea3fad3..92fd246a135886 100644 --- a/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp @@ -256,6 +256,9 @@ struct TransposeOpLowering : public ConversionPattern { namespace { struct ToyToAffineLoweringPass : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnFunction() final; }; } // end anonymous namespace. diff --git a/mlir/examples/toy/Ch5/toyc.cpp b/mlir/examples/toy/Ch5/toyc.cpp index ed0496957093bb..16faac02fc60d0 100644 --- a/mlir/examples/toy/Ch5/toyc.cpp +++ b/mlir/examples/toy/Ch5/toyc.cpp @@ -106,10 +106,10 @@ int loadMLIR(llvm::SourceMgr &sourceMgr, mlir::MLIRContext &context, } int dumpMLIR() { - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; llvm::SourceMgr sourceMgr; mlir::SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); diff --git a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp index cac3415f48d68f..f3857f35e25c95 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp @@ -255,6 +255,9 @@ struct TransposeOpLowering : public ConversionPattern { namespace { struct ToyToAffineLoweringPass : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnFunction() final; }; } // end anonymous namespace. diff --git a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp index 74b32dc0ca1102..19bf27e1864d18 100644 --- a/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp @@ -159,6 +159,9 @@ class PrintOpLowering : public ConversionPattern { namespace { struct ToyToLLVMLoweringPass : public PassWrapper> { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnOperation() final; }; } // end anonymous namespace diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp index bdcdf1af7ea831..9504a38b8784c9 100644 --- a/mlir/examples/toy/Ch6/toyc.cpp +++ b/mlir/examples/toy/Ch6/toyc.cpp @@ -255,10 +255,10 @@ int main(int argc, char **argv) { // If we aren't dumping the AST, then we are compiling with/to MLIR. - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; if (int error = loadAndProcessMLIR(context, module)) return error; diff --git a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp index 3097681ea3fad3..92fd246a135886 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp @@ -256,6 +256,9 @@ struct TransposeOpLowering : public ConversionPattern { namespace { struct ToyToAffineLoweringPass : public PassWrapper { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnFunction() final; }; } // end anonymous namespace. diff --git a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp index 74b32dc0ca1102..19bf27e1864d18 100644 --- a/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp +++ b/mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp @@ -159,6 +159,9 @@ class PrintOpLowering : public ConversionPattern { namespace { struct ToyToLLVMLoweringPass : public PassWrapper> { + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } void runOnOperation() final; }; } // end anonymous namespace diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp index c1cc207a406ce2..cb3b455dc7ecbe 100644 --- a/mlir/examples/toy/Ch7/toyc.cpp +++ b/mlir/examples/toy/Ch7/toyc.cpp @@ -256,10 +256,10 @@ int main(int argc, char **argv) { // If we aren't dumping the AST, then we are compiling with/to MLIR. - // Register our Dialect with MLIR. - mlir::registerDialect(); + mlir::MLIRContext context(/*loadAllDialects=*/false); + // Load our Dialect in this MLIR Context. + context.getOrLoadDialect(); - mlir::MLIRContext context; mlir::OwningModuleRef module; if (int error = loadAndProcessMLIR(context, module)) return error; diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index 68546bf35625a2..f0c421bd5cce9b 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -88,6 +88,12 @@ MlirContext mlirContextCreate(); /** Takes an MLIR context owned by the caller and destroys it. */ void mlirContextDestroy(MlirContext context); +/** Load all the globally registered dialects in the provided context. + * TODO: remove the concept of globally registered dialect by exposing the + * DialectRegistry. + */ +void mlirContextLoadAllDialects(MlirContext context); + /*============================================================================*/ /* Location API. */ /*============================================================================*/ diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 4ff23d71a5c0bf..0a043c01e98140 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -66,6 +66,11 @@ def ConvertAffineToStandard : Pass<"lower-affine"> { `affine.apply`. }]; let constructor = "mlir::createLowerAffinePass()"; + let dependentDialects = [ + "scf::SCFDialect", + "StandardOpsDialect", + "vector::VectorDialect" + ]; } //===----------------------------------------------------------------------===// @@ -76,6 +81,7 @@ def ConvertAVX512ToLLVM : Pass<"convert-avx512-to-llvm", "ModuleOp"> { let summary = "Convert the operations from the avx512 dialect into the LLVM " "dialect"; let constructor = "mlir::createConvertAVX512ToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect", "LLVM::LLVMAVX512Dialect"]; } //===----------------------------------------------------------------------===// @@ -98,6 +104,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> { def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { let summary = "Generate NVVM operations for gpu operations"; let constructor = "mlir::createLowerGpuOpsToNVVMOpsPass()"; + let dependentDialects = ["NVVM::NVVMDialect"]; let options = [ Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", @@ -112,6 +119,7 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> { def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { let summary = "Generate ROCDL operations for gpu operations"; let constructor = "mlir::createLowerGpuOpsToROCDLOpsPass()"; + let dependentDialects = ["ROCDL::ROCDLDialect"]; let options = [ Option<"indexBitwidth", "index-bitwidth", "unsigned", /*default=kDeriveIndexBitwidthFromDataLayout*/"0", @@ -126,6 +134,7 @@ def ConvertGpuOpsToROCDLOps : Pass<"convert-gpu-to-rocdl", "gpu::GPUModuleOp"> { def ConvertGPUToSPIRV : Pass<"convert-gpu-to-spirv", "ModuleOp"> { let summary = "Convert GPU dialect to SPIR-V dialect"; let constructor = "mlir::createConvertGPUToSPIRVPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } //===----------------------------------------------------------------------===// @@ -136,6 +145,7 @@ def ConvertGpuLaunchFuncToVulkanLaunchFunc : Pass<"convert-gpu-launch-to-vulkan-launch", "ModuleOp"> { let summary = "Convert gpu.launch_func to vulkanLaunch external call"; let constructor = "mlir::createConvertGpuLaunchFuncToVulkanLaunchFuncPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } def ConvertVulkanLaunchFuncToVulkanCalls @@ -143,6 +153,7 @@ def ConvertVulkanLaunchFuncToVulkanCalls let summary = "Convert vulkanLaunch external call to Vulkan runtime external " "calls"; let constructor = "mlir::createConvertVulkanLaunchFuncToVulkanCallsPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// @@ -153,6 +164,7 @@ def ConvertLinalgToLLVM : Pass<"convert-linalg-to-llvm", "ModuleOp"> { let summary = "Convert the operations from the linalg dialect into the LLVM " "dialect"; let constructor = "mlir::createConvertLinalgToLLVMPass()"; + let dependentDialects = ["scf::SCFDialect", "LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// @@ -163,6 +175,7 @@ def ConvertLinalgToStandard : Pass<"convert-linalg-to-std", "ModuleOp"> { let summary = "Convert the operations from the linalg dialect into the " "Standard dialect"; let constructor = "mlir::createConvertLinalgToStandardPass()"; + let dependentDialects = ["StandardOpsDialect"]; } //===----------------------------------------------------------------------===// @@ -172,6 +185,7 @@ def ConvertLinalgToStandard : Pass<"convert-linalg-to-std", "ModuleOp"> { def ConvertLinalgToSPIRV : Pass<"convert-linalg-to-spirv", "ModuleOp"> { let summary = "Convert Linalg ops to SPIR-V ops"; let constructor = "mlir::createLinalgToSPIRVPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } //===----------------------------------------------------------------------===// @@ -182,6 +196,7 @@ def SCFToStandard : Pass<"convert-scf-to-std"> { let summary = "Convert SCF dialect to Standard dialect, replacing structured" " control flow with a CFG"; let constructor = "mlir::createLowerToCFGPass()"; + let dependentDialects = ["StandardOpsDialect"]; } //===----------------------------------------------------------------------===// @@ -191,6 +206,7 @@ def SCFToStandard : Pass<"convert-scf-to-std"> { def ConvertAffineForToGPU : FunctionPass<"convert-affine-for-to-gpu"> { let summary = "Convert top-level AffineFor Ops to GPU kernels"; let constructor = "mlir::createAffineForToGPUPass()"; + let dependentDialects = ["gpu::GPUDialect"]; let options = [ Option<"numBlockDims", "gpu-block-dims", "unsigned", /*default=*/"1u", "Number of GPU block dimensions for mapping">, @@ -202,6 +218,7 @@ def ConvertAffineForToGPU : FunctionPass<"convert-affine-for-to-gpu"> { def ConvertParallelLoopToGpu : Pass<"convert-parallel-loops-to-gpu"> { let summary = "Convert mapped scf.parallel ops to gpu launch operations"; let constructor = "mlir::createParallelLoopToGpuPass()"; + let dependentDialects = ["AffineDialect", "gpu::GPUDialect"]; } //===----------------------------------------------------------------------===// @@ -212,6 +229,7 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { let summary = "Convert operations from the shape dialect into the standard " "dialect"; let constructor = "mlir::createConvertShapeToStandardPass()"; + let dependentDialects = ["StandardOpsDialect"]; } //===----------------------------------------------------------------------===// @@ -221,6 +239,7 @@ def ConvertShapeToStandard : Pass<"convert-shape-to-std", "ModuleOp"> { def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { let summary = "Convert operations from the shape dialect to the SCF dialect"; let constructor = "mlir::createConvertShapeToSCFPass()"; + let dependentDialects = ["scf::SCFDialect"]; } //===----------------------------------------------------------------------===// @@ -230,6 +249,7 @@ def ConvertShapeToSCF : FunctionPass<"convert-shape-to-scf"> { def ConvertSPIRVToLLVM : Pass<"convert-spirv-to-llvm", "ModuleOp"> { let summary = "Convert SPIR-V dialect to LLVM dialect"; let constructor = "mlir::createConvertSPIRVToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// @@ -264,6 +284,7 @@ def ConvertStandardToLLVM : Pass<"convert-std-to-llvm", "ModuleOp"> { LLVM IR types. }]; let constructor = "mlir::createLowerToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ Option<"useAlignedAlloc", "use-aligned-alloc", "bool", /*default=*/"false", "Use aligned_alloc in place of malloc for heap allocations">, @@ -291,11 +312,13 @@ def ConvertStandardToLLVM : Pass<"convert-std-to-llvm", "ModuleOp"> { def LegalizeStandardForSPIRV : Pass<"legalize-std-for-spirv"> { let summary = "Legalize standard ops for SPIR-V lowering"; let constructor = "mlir::createLegalizeStdOpsForSPIRVLoweringPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } def ConvertStandardToSPIRV : Pass<"convert-std-to-spirv", "ModuleOp"> { let summary = "Convert Standard Ops to SPIR-V dialect"; let constructor = "mlir::createConvertStandardToSPIRVPass()"; + let dependentDialects = ["spirv::SPIRVDialect"]; } //===----------------------------------------------------------------------===// @@ -306,6 +329,7 @@ def ConvertVectorToSCF : FunctionPass<"convert-vector-to-scf"> { let summary = "Lower the operations from the vector dialect into the SCF " "dialect"; let constructor = "mlir::createConvertVectorToSCFPass()"; + let dependentDialects = ["AffineDialect", "scf::SCFDialect"]; let options = [ Option<"fullUnroll", "full-unroll", "bool", /*default=*/"false", "Perform full unrolling when converting vector transfers to SCF">, @@ -320,6 +344,7 @@ def ConvertVectorToLLVM : Pass<"convert-vector-to-llvm", "ModuleOp"> { let summary = "Lower the operations from the vector dialect into the LLVM " "dialect"; let constructor = "mlir::createConvertVectorToLLVMPass()"; + let dependentDialects = ["LLVM::LLVMDialect"]; let options = [ Option<"reassociateFPReductions", "reassociate-fp-reductions", "bool", /*default=*/"false", @@ -335,6 +360,7 @@ def ConvertVectorToROCDL : Pass<"convert-vector-to-rocdl", "ModuleOp"> { let summary = "Lower the operations from the vector dialect into the ROCDL " "dialect"; let constructor = "mlir::createConvertVectorToROCDLPass()"; + let dependentDialects = ["ROCDL::ROCDLDialect"]; } #endif // MLIR_CONVERSION_PASSES diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td index 810640058155fb..f43fabd19aaefe 100644 --- a/mlir/include/mlir/Dialect/Affine/Passes.td +++ b/mlir/include/mlir/Dialect/Affine/Passes.td @@ -94,6 +94,7 @@ def AffineLoopUnrollAndJam : FunctionPass<"affine-loop-unroll-jam"> { def AffineVectorize : FunctionPass<"affine-super-vectorize"> { let summary = "Vectorize to a target independent n-D vector abstraction"; let constructor = "mlir::createSuperVectorizePass()"; + let dependentDialects = ["vector::VectorDialect"]; let options = [ ListOption<"vectorSizes", "virtual-vector-size", "int64_t", "Specify an n-D virtual vector size for vectorization", diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index 04700f0aa17dbb..2f465f07a97e42 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -15,6 +15,7 @@ #define MLIR_DIALECT_LLVMIR_LLVMDIALECT_H_ #include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Function.h" #include "mlir/IR/OpDefinition.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td index e824f97bc28544..226743587bd9d5 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td @@ -19,6 +19,11 @@ include "mlir/IR/OpBase.td" def LLVM_Dialect : Dialect { let name = "llvm"; let cppNamespace = "LLVM"; + + /// FIXME: at the moment this is a dependency of the translation to LLVM IR, + /// not really one of this dialect per-se. + let dependentDialects = ["omp::OpenMPDialect"]; + let hasRegionArgAttrVerify = 1; let hasOperationAttrVerify = 1; let extraClassDeclaration = [{ diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h index 86d437c9b561b7..9cc5314bdb901f 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -14,6 +14,7 @@ #ifndef MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_ #define MLIR_DIALECT_LLVMIR_NVVMDIALECT_H_ +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 5f022e32b801d6..7d47e5012ac9a0 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -23,6 +23,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def NVVM_Dialect : Dialect { let name = "nvvm"; let cppNamespace = "NVVM"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h index bf761c357f9074..eb40373c3f1171 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h @@ -22,6 +22,7 @@ #ifndef MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ #define MLIR_DIALECT_LLVMIR_ROCDLDIALECT_H_ +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td index 0cd11690daa8ba..f85c4f02899b46 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td @@ -23,6 +23,7 @@ include "mlir/Interfaces/SideEffectInterfaces.td" def ROCDL_Dialect : Dialect { let name = "rocdl"; let cppNamespace = "ROCDL"; + let dependentDialects = ["LLVM::LLVMDialect"]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index 11f12ad30eb6c0..dcf4b5ec06cb6f 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -30,17 +30,20 @@ def LinalgFusion : FunctionPass<"linalg-fusion"> { def LinalgFusionOfTensorOps : Pass<"linalg-fusion-for-tensor-ops"> { let summary = "Fuse operations on RankedTensorType in linalg dialect"; let constructor = "mlir::createLinalgFusionOfTensorOpsPass()"; + let dependentDialects = ["AffineDialect"]; } def LinalgLowerToAffineLoops : FunctionPass<"convert-linalg-to-affine-loops"> { let summary = "Lower the operations from the linalg dialect into affine " "loops"; let constructor = "mlir::createConvertLinalgToAffineLoopsPass()"; + let dependentDialects = ["AffineDialect"]; } def LinalgLowerToLoops : FunctionPass<"convert-linalg-to-loops"> { let summary = "Lower the operations from the linalg dialect into loops"; let constructor = "mlir::createConvertLinalgToLoopsPass()"; + let dependentDialects = ["scf::SCFDialect", "AffineDialect"]; } def LinalgOnTensorsToBuffers : Pass<"convert-linalg-on-tensors-to-buffers", "ModuleOp"> { @@ -54,6 +57,7 @@ def LinalgLowerToParallelLoops let summary = "Lower the operations from the linalg dialect into parallel " "loops"; let constructor = "mlir::createConvertLinalgToParallelLoopsPass()"; + let dependentDialects = ["AffineDialect", "scf::SCFDialect"]; } def LinalgPromotion : FunctionPass<"linalg-promote-subviews"> { @@ -70,6 +74,9 @@ def LinalgPromotion : FunctionPass<"linalg-promote-subviews"> { def LinalgTiling : FunctionPass<"linalg-tile"> { let summary = "Tile operations in the linalg dialect"; let constructor = "mlir::createLinalgTilingPass()"; + let dependentDialects = [ + "AffineDialect", "scf::SCFDialect" + ]; let options = [ ListOption<"tileSizes", "linalg-tile-sizes", "int64_t", "Test generation of dynamic promoted buffers", @@ -86,6 +93,7 @@ def LinalgTilingToParallelLoops "Test generation of dynamic promoted buffers", "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> ]; + let dependentDialects = ["AffineDialect", "scf::SCFDialect"]; } #endif // MLIR_DIALECT_LINALG_PASSES diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td index 483d0ba7c7be08..6f3cf0e1264235 100644 --- a/mlir/include/mlir/Dialect/SCF/Passes.td +++ b/mlir/include/mlir/Dialect/SCF/Passes.td @@ -36,6 +36,7 @@ def SCFParallelLoopTiling : FunctionPass<"parallel-loop-tiling"> { "Factors to tile parallel loops by", "llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated"> ]; + let dependentDialects = ["AffineDialect"]; } #endif // MLIR_DIALECT_SCF_PASSES diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h index 4f9e4cb3618b65..8c0fef0d7ccf64 100644 --- a/mlir/include/mlir/IR/Dialect.h +++ b/mlir/include/mlir/IR/Dialect.h @@ -16,6 +16,8 @@ #include "mlir/IR/OperationSupport.h" #include "mlir/Support/TypeID.h" +#include + namespace mlir { class DialectAsmParser; class DialectAsmPrinter; @@ -23,7 +25,7 @@ class DialectInterface; class OpBuilder; class Type; -using DialectAllocatorFunction = std::function; +using DialectAllocatorFunction = std::function; /// Dialects are groups of MLIR operations and behavior associated with the /// entire group. For example, hooks into other systems for constant folding, @@ -212,30 +214,87 @@ class Dialect { /// A collection of registered dialect interfaces. DenseMap> registeredInterfaces; - /// Registers a specific dialect creation function with the global registry. - /// Used through the registerDialect template. - /// Registrations are deduplicated by dialect TypeID and only the first - /// registration will be used. - static void - registerDialectAllocator(TypeID typeID, - const DialectAllocatorFunction &function); - template friend void registerDialect(); friend class MLIRContext; }; -/// Registers all dialects and hooks from the global registries with the -/// specified MLIRContext. +/// The DialectRegistry maps a dialect namespace to a constructor for the +/// matching dialect. +/// This allows for decoupling the list of dialects "available" from the +/// dialects loaded in the Context. The parser in particular will lazily load +/// dialects in in the Context as operations are encountered. +class DialectRegistry { + using MapTy = + std::map>; + +public: + template + void insert() { + insert(TypeID::get(), + ConcreteDialect::getDialectNamespace(), + static_cast(([](MLIRContext *ctx) { + // Just allocate the dialect, the context + // takes ownership of it. + return ctx->getOrLoadDialect(); + }))); + } + + template + void insert() { + insert(); + insert(); + } + + /// Add a new dialect constructor to the registry. + void insert(TypeID typeID, StringRef name, DialectAllocatorFunction ctor); + + /// Load a dialect for this namespace in the provided context. + Dialect *loadByName(StringRef name, MLIRContext *context); + + // Register all dialects available in the current registry with the registry + // in the provided context. + void appendTo(DialectRegistry &destination) { + for (const auto &nameAndRegistrationIt : registry) + destination.insert(nameAndRegistrationIt.second.first, + nameAndRegistrationIt.first, + nameAndRegistrationIt.second.second); + } + // Load all dialects available in the registry in the provided context. + void loadAll(MLIRContext *context) { + for (const auto &nameAndRegistrationIt : registry) + nameAndRegistrationIt.second.second(context); + } + + MapTy::const_iterator begin() const { return registry.begin(); } + MapTy::const_iterator end() const { return registry.end(); } + +private: + MapTy registry; +}; + +/// Deprecated: this provides a global registry for convenience, while we're +/// transitionning the registration mechanism to a stateless approach. +DialectRegistry &getGlobalDialectRegistry(); + +/// Registers all dialects from the global registries with the +/// specified MLIRContext. This won't load the dialects in the context, +/// but only make them available for lazy loading by name. /// Note: This method is not thread-safe. void registerAllDialects(MLIRContext *context); +/// Register and return the dialect with the given namespace in the provided +/// context. Returns nullptr is there is no constructor registered for this +/// dialect. +inline Dialect *registerDialect(StringRef name, MLIRContext *context) { + return getGlobalDialectRegistry().loadByName(name, context); +} + /// Utility to register a dialect. Client can register their dialect with the /// global registry by calling registerDialect(); /// Note: This method is not thread-safe. template void registerDialect() { - Dialect::registerDialectAllocator( - TypeID::get(), - [](MLIRContext *ctx) { ctx->getOrCreateDialect(); }); + getGlobalDialectRegistry().insert(); } /// DialectRegistration provides a global initializer that registers a Dialect diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h index 7e281f393af946..3d467cd4f3642f 100644 --- a/mlir/include/mlir/IR/FunctionSupport.h +++ b/mlir/include/mlir/IR/FunctionSupport.h @@ -428,7 +428,7 @@ LogicalResult FunctionLike::verifyTrait(Operation *op) { if (!attr.first.strref().contains('.')) return funcOp.emitOpError("arguments may only have dialect attributes"); auto dialectNamePair = attr.first.strref().split('.'); - if (auto *dialect = ctx->getRegisteredDialect(dialectNamePair.first)) { + if (auto *dialect = ctx->getLoadedDialect(dialectNamePair.first)) { if (failed(dialect->verifyRegionArgAttribute(op, /*regionIndex=*/0, /*argIndex=*/i, attr))) return failure(); @@ -444,7 +444,7 @@ LogicalResult FunctionLike::verifyTrait(Operation *op) { if (!attr.first.strref().contains('.')) return funcOp.emitOpError("results may only have dialect attributes"); auto dialectNamePair = attr.first.strref().split('.'); - if (auto *dialect = ctx->getRegisteredDialect(dialectNamePair.first)) { + if (auto *dialect = ctx->getLoadedDialect(dialectNamePair.first)) { if (failed(dialect->verifyRegionResultAttribute(op, /*regionIndex=*/0, /*resultIndex=*/i, attr))) diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index 0192a8ae06af87..e8a5d6e6d2368b 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -19,10 +19,12 @@ namespace mlir { class AbstractOperation; class DiagnosticEngine; class Dialect; +class DialectRegistry; class InFlightDiagnostic; class Location; class MLIRContextImpl; class StorageUniquer; +DialectRegistry &getGlobalDialectRegistry(); /// MLIRContext is the top-level object for a collection of MLIR modules. It /// holds immortal uniqued objects like types, and the tables used to unique @@ -34,34 +36,69 @@ class StorageUniquer; /// class MLIRContext { public: - explicit MLIRContext(); + /// Create a new Context. + /// The loadAllDialects parameters allows to load all dialects from the global + /// registry on Context construction. It is deprecated and will be removed + /// soon. + explicit MLIRContext(bool loadAllDialects = true); ~MLIRContext(); - /// Return information about all registered IR dialects. - std::vector getRegisteredDialects(); + /// Return information about all IR dialects loaded in the context. + std::vector getLoadedDialects(); + + /// Return the dialect registry associated with this context. + DialectRegistry &getDialectRegistry(); + + /// Return information about all available dialects in the registry in this + /// context. + std::vector getAvailableDialects(); /// Get a registered IR dialect with the given namespace. If an exact match is /// not found, then return nullptr. - Dialect *getRegisteredDialect(StringRef name); + Dialect *getLoadedDialect(StringRef name); /// Get a registered IR dialect for the given derived dialect type. The /// derived type must provide a static 'getDialectNamespace' method. - template T *getRegisteredDialect() { - return static_cast(getRegisteredDialect(T::getDialectNamespace())); + template + T *getLoadedDialect() { + return static_cast(getLoadedDialect(T::getDialectNamespace())); } /// Get (or create) a dialect for the given derived dialect type. The derived /// type must provide a static 'getDialectNamespace' method. template - T *getOrCreateDialect() { - return static_cast(getOrCreateDialect( - T::getDialectNamespace(), TypeID::get(), [this]() { + T *getOrLoadDialect() { + return static_cast( + getOrLoadDialect(T::getDialectNamespace(), TypeID::get(), [this]() { std::unique_ptr dialect(new T(this)); - dialect->dialectID = TypeID::get(); return dialect; })); } + /// Load a dialect in the context. + template + void loadDialect() { + getOrLoadDialect(); + } + + /// Load a list dialects in the context. + template + void loadDialect() { + getOrLoadDialect(); + loadDialect(); + } + + /// Deprecated: load all globally registered dialects into this context. + /// This method will be removed soon, it can be used temporarily as we're + /// phasing out the global registry. + void loadAllGloballyRegisteredDialects(); + + /// Get (or create) a dialect for the given derived dialect name. + /// The dialect will be loaded from the registry if no dialect is found. + /// If no dialect is loaded for this name and none is available in the + /// registry, returns nullptr. + Dialect *getOrLoadDialect(StringRef name); + /// Return true if we allow to create operation for unregistered dialects. bool allowsUnregisteredDialects(); @@ -123,10 +160,12 @@ class MLIRContext { const std::unique_ptr impl; /// Get a dialect for the provided namespace and TypeID: abort the program if - /// a dialect exist for this namespace with different TypeID. Returns a - /// pointer to the dialect owned by the context. - Dialect *getOrCreateDialect(StringRef dialectNamespace, TypeID dialectID, - function_ref()> ctor); + /// a dialect exist for this namespace with different TypeID. If a dialect has + /// not been loaded for this namespace/TypeID yet, use the provided ctor to + /// create one on the fly and load it. Returns a pointer to the dialect owned + /// by the context. + Dialect *getOrLoadDialect(StringRef dialectNamespace, TypeID dialectID, + function_ref()> ctor); MLIRContext(const MLIRContext &) = delete; void operator=(const MLIRContext &) = delete; diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 9cc57a61728949..a28410f028d5f0 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -244,6 +244,11 @@ class Dialect { // The description of the dialect. string description = ?; + // A list of dialects this dialect will load on construction as dependencies. + // These are dialects that this dialect may involved in canonicalization + // pattern or interfaces. + list dependentDialects = []; + // The C++ namespace that ops of this dialect should be placed into. // // By default, uses the name of the dialect as the only namespace. To avoid diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h index b76b26fe348346..147ececc4c5a96 100644 --- a/mlir/include/mlir/InitAllDialects.h +++ b/mlir/include/mlir/InitAllDialects.h @@ -35,30 +35,35 @@ namespace mlir { +// Add all the MLIR dialects to the provided registry. +inline void registerAllDialects(DialectRegistry ®istry) { + // clang-format off + registry.insert(); + // clang-format on +} + // This function should be called before creating any MLIRContext if one expect // all the possible dialects to be made available to the context automatically. inline void registerAllDialects() { - static bool init_once = []() { - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - registerDialect(); - return true; - }(); - (void)init_once; + static bool initOnce = + ([]() { registerAllDialects(getGlobalDialectRegistry()); }(), true); + (void)initOnce; } } // namespace mlir diff --git a/mlir/include/mlir/InitAllTranslations.h b/mlir/include/mlir/InitAllTranslations.h index 31ca0254cf8999..a1771dab144c04 100644 --- a/mlir/include/mlir/InitAllTranslations.h +++ b/mlir/include/mlir/InitAllTranslations.h @@ -28,7 +28,7 @@ void registerAVX512ToLLVMIRTranslation(); // expects all the possible translations to be made available to the context // automatically. inline void registerAllTranslations() { - static bool init_once = []() { + static bool initOnce = []() { registerFromLLVMIRTranslation(); registerFromSPIRVTranslation(); registerToLLVMIRTranslation(); @@ -38,7 +38,7 @@ inline void registerAllTranslations() { registerAVX512ToLLVMIRTranslation(); return true; }(); - (void)init_once; + (void)initOnce; } } // namespace mlir diff --git a/mlir/include/mlir/Pass/Pass.h b/mlir/include/mlir/Pass/Pass.h index 8de31d9443190d..cd4c06acd070b4 100644 --- a/mlir/include/mlir/Pass/Pass.h +++ b/mlir/include/mlir/Pass/Pass.h @@ -9,6 +9,7 @@ #ifndef MLIR_PASS_PASS_H #define MLIR_PASS_PASS_H +#include "mlir/IR/Dialect.h" #include "mlir/IR/Function.h" #include "mlir/Pass/AnalysisManager.h" #include "mlir/Pass/PassRegistry.h" @@ -57,6 +58,13 @@ class Pass { /// Returns the derived pass name. virtual StringRef getName() const = 0; + /// Register dependent dialects for the current pass. + /// A pass is expected to register the dialects it will create entities for + /// (Operations, Types, Attributes), other than dialect that exists in the + /// input. For example, a pass that converts from Linalg to Affine would + /// register the Affine dialect but does not need to register Linalg. + virtual void getDependentDialects(DialectRegistry ®istry) const {} + /// Returns the command line argument used when registering this pass. Return /// an empty string if one does not exist. virtual StringRef getArgument() const { diff --git a/mlir/include/mlir/Pass/PassBase.td b/mlir/include/mlir/Pass/PassBase.td index 54b44031559e72..7a2feff4fe0454 100644 --- a/mlir/include/mlir/Pass/PassBase.td +++ b/mlir/include/mlir/Pass/PassBase.td @@ -78,6 +78,9 @@ class PassBase { // A C++ constructor call to create an instance of this pass. code constructor = [{}]; + // A list of dialects this pass may produce entities in. + list dependentDialects = []; + // A set of options provided by this pass. list